1 #include "renpy.h"
2 #include "IMG_savepng.h"
3 #include <SDL.h>
4 #include <pygame_sdl2/pygame_sdl2.h>
5 #include <stdio.h>
6 #include <math.h>
7 
8 // Shows how to do this.
9 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
10 #endif
11 
12 /* Initializes the stuff found in this file.
13  */
core_init()14 void core_init() {
15     import_pygame_sdl2();
16 }
17 
save_png_core(PyObject * pysurf,SDL_RWops * rw,int compress)18 void save_png_core(PyObject *pysurf, SDL_RWops *rw, int compress) {
19     SDL_Surface *surf;
20 
21     surf = PySurface_AsSurface(pysurf);
22 
23     /* Can't release GIL, since we're not using threaded RWops. */
24     renpy_IMG_SavePNG_RW(rw, surf, compress);
25 }
26 
27 /* This pixellates a 32-bit RGBA pygame surface to a destination
28  * surface of a given size.
29  *
30  * pysrc - The source pygame surface, which must be 32-bit RGBA.
31  * pydst - The destination pygame surface, which should be 32-bit
32  * RGBA, and locked.
33  * avgwidth - The width of the pixels that will be averaged together.
34  * avgheight - The height of the pixels that will be averaged
35  * together.
36  * outwidth - The width of pixels that will be written to the output.
37  * outheight - The height of pixels that will be written to the
38  * output.
39  *
40  * We assume that pysrc and pydst have been locked before we are called.
41  */
pixellate32_core(PyObject * pysrc,PyObject * pydst,int avgwidth,int avgheight,int outwidth,int outheight)42 void pixellate32_core(PyObject *pysrc,
43                       PyObject *pydst,
44                       int avgwidth,
45                       int avgheight,
46                       int outwidth,
47                       int outheight
48     ) {
49 
50     SDL_Surface *src;
51     SDL_Surface *dst;
52 
53     int x, y, i, j;
54     Uint32 srcpitch, dstpitch;
55     Uint32 srcw, srch;
56     Uint32 dstw, dsth;
57     int vw, vh;
58 
59     unsigned char *srcpixels;
60     unsigned char *dstpixels;
61 
62     src = PySurface_AsSurface(pysrc);
63     dst = PySurface_AsSurface(pydst);
64 
65     Py_BEGIN_ALLOW_THREADS
66 
67     srcpixels = (unsigned char *) src->pixels;
68     dstpixels = (unsigned char *) dst->pixels;
69     srcpitch = src->pitch;
70     dstpitch = dst->pitch;
71     srcw = src->w;
72     dstw = dst->w;
73     srch = src->h;
74     dsth = dst->h;
75 
76     /* Compute the virtual width and height. */
77     vw = ( srcw + avgwidth - 1) / avgwidth;
78     vh = ( srch + avgheight - 1) / avgheight;
79 
80     /* Iterate through each of the virtual pixels. */
81 
82     for (y = 0; y < vh; y++) {
83         int srcy = avgheight * y;
84         int dsty = outheight * y;
85 
86         int srcylimit = srcy + avgheight;
87         int dstylimit = dsty + outheight;
88 
89         if (srcylimit > srch) {
90             srcylimit = srch;
91         }
92 
93         if (dstylimit > dsth) {
94             dstylimit = dsth;
95         }
96 
97         for (x = 0; x < vw; x++) {
98             int srcx = avgwidth * x;
99             int dstx = outwidth * x;
100 
101             int srcxlimit = srcx + avgwidth;
102             int dstxlimit = dstx + outheight;
103 
104             if (srcxlimit > srcw) {
105                 srcxlimit = srcw;
106             }
107 
108             if (dstxlimit > dstw) {
109                 dstxlimit = dstw;
110             }
111 
112             // Please note that these names are just
113             // suggestions... It's possible that alpha will be
114             // in r, for example.
115             int r = 0;
116             int g = 0;
117             int b = 0;
118             int a = 0;
119 
120             int number = 0;
121 
122             // pos always points to the start of the current line.
123             unsigned char *pos = &srcpixels[srcy * srcpitch + srcx * 4];
124 
125             /* Sum up the pixel values. */
126 
127             for (j = srcy; j < srcylimit; j++) {
128                 // po points to the current pixel.
129                 unsigned char *po = pos;
130 
131                 for (i = srcx; i < srcxlimit; i++) {
132                     r += *po++;
133                     g += *po++;
134                     b += *po++;
135                     a += *po++;
136                     number += 1;
137                 }
138 
139                 pos += srcpitch;
140             }
141 
142             /* Compute the average pixel values. */
143             r /= number;
144             g /= number;
145             b /= number;
146             a /= number;
147 
148             /* Write out the average pixel values. */
149             pos = &dstpixels[dsty * dstpitch + dstx * 4];
150             for (j = dsty; j < dstylimit; j++) {
151                 unsigned char *po = pos;
152 
153                 for (i = dstx; i < dstxlimit; i++) {
154                     *po++ = r;
155                     *po++ = g;
156                     *po++ = b;
157                     *po++ = a;
158                 }
159 
160                 pos += dstpitch;
161             }
162         }
163     }
164 
165     Py_END_ALLOW_THREADS
166 
167 }
168 
169 /* This pixellates a 32-bit RGBA pygame surface to a destination
170  * surface of a given size.
171  *
172  * pysrc - The source pygame surface, which must be 32-bit RGBA.
173  * pydst - The destination pygame surface, which should be 32-bit
174  * RGBA, and locked.
175  * avgwidth - The width of the pixels that will be averaged together.
176  * avgheight - The height of the pixels that will be averaged
177  * together.
178  * outwidth - The width of pixels that will be written to the output.
179  * outheight - The height of pixels that will be written to the
180  * output.
181  *
182  * We assume that pysrc and pydst have been locked before we are called.
183  */
pixellate24_core(PyObject * pysrc,PyObject * pydst,int avgwidth,int avgheight,int outwidth,int outheight)184 void pixellate24_core(PyObject *pysrc,
185                       PyObject *pydst,
186                       int avgwidth,
187                       int avgheight,
188                       int outwidth,
189                       int outheight
190     ) {
191 
192     SDL_Surface *src;
193     SDL_Surface *dst;
194 
195     int x, y, i, j;
196     Uint32 srcpitch, dstpitch;
197     Uint32 srcw, srch;
198     Uint32 dstw, dsth;
199     int vw, vh;
200 
201     unsigned char *srcpixels;
202     unsigned char *dstpixels;
203 
204     src = PySurface_AsSurface(pysrc);
205     dst = PySurface_AsSurface(pydst);
206 
207     Py_BEGIN_ALLOW_THREADS
208 
209     srcpixels = (unsigned char *) src->pixels;
210     dstpixels = (unsigned char *) dst->pixels;
211     srcpitch = src->pitch;
212     dstpitch = dst->pitch;
213     srcw = src->w;
214     dstw = dst->w;
215     srch = src->h;
216     dsth = dst->h;
217 
218     /* Compute the virtual width and height. */
219     vw = ( srcw + avgwidth - 1) / avgwidth;
220     vh = ( srch + avgheight - 1) / avgheight;
221 
222     /* Iterate through each of the virtual pixels. */
223 
224     for (y = 0; y < vh; y++) {
225         int srcy = avgheight * y;
226         int dsty = outheight * y;
227 
228         int srcylimit = srcy + avgheight;
229         int dstylimit = dsty + outheight;
230 
231         if (srcylimit > srch) {
232             srcylimit = srch;
233         }
234 
235         if (dstylimit > dsth) {
236             dstylimit = dsth;
237         }
238 
239         for (x = 0; x < vw; x++) {
240             int srcx = avgwidth * x;
241             int dstx = outwidth * x;
242 
243             int srcxlimit = srcx + avgwidth;
244             int dstxlimit = dstx + outheight;
245 
246             if (srcxlimit > srcw) {
247                 srcxlimit = srcw;
248             }
249 
250             if (dstxlimit > dstw) {
251                 dstxlimit = dstw;
252             }
253 
254             // Please note that these names are just
255             // suggestions... It's possible that blue will be
256             // in r, for example.
257             int r = 0;
258             int g = 0;
259             int b = 0;
260 
261             int number = 0;
262 
263             // pos always points to the start of the current line.
264             unsigned char *pos = &srcpixels[srcy * srcpitch + srcx * 3];
265 
266             /* Sum up the pixel values. */
267 
268             for (j = srcy; j < srcylimit; j++) {
269                 // po points to the current pixel.
270                 unsigned char *po = pos;
271 
272                 for (i = srcx; i < srcxlimit; i++) {
273                     r += *po++;
274                     g += *po++;
275                     b += *po++;
276                     number += 1;
277                 }
278 
279                 pos += srcpitch;
280             }
281 
282             /* Compute the average pixel values. */
283             r /= number;
284             g /= number;
285             b /= number;
286 
287             /* Write out the average pixel values. */
288             pos = &dstpixels[dsty * dstpitch + dstx * 3];
289             for (j = dsty; j < dstylimit; j++) {
290                 unsigned char *po = pos;
291 
292                 for (i = dstx; i < dstxlimit; i++) {
293                     *po++ = r;
294                     *po++ = g;
295                     *po++ = b;
296                 }
297 
298                 pos += dstpitch;
299             }
300         }
301     }
302 
303     Py_END_ALLOW_THREADS
304 
305 }
306 
307 /*
308  * This expects pysrc and pydst to be surfaces of the same size. It
309  * the source surface to the destination surface, using the r, g, b,
310  * and a maps. These maps are expected to be 256 bytes long, with each
311  * byte corresponding to a possible value of a channel in pysrc,
312  * giving what that value is mapped to in pydst.
313  */
map32_core(PyObject * pysrc,PyObject * pydst,char * rmap,char * gmap,char * bmap,char * amap)314 void map32_core(PyObject *pysrc,
315                 PyObject *pydst,
316                 char *rmap,
317                 char *gmap,
318                 char *bmap,
319                 char *amap) {
320 
321     SDL_Surface *src;
322     SDL_Surface *dst;
323 
324     int x, y;
325     Uint32 srcpitch, dstpitch;
326     Uint32 srcw, srch;
327     Uint32 dstw, dsth;
328 
329     char *srcpixels;
330     char *dstpixels;
331 
332     char *srcrow;
333     char *dstrow;
334     char *srcp;
335     char *dstp;
336 
337     src = PySurface_AsSurface(pysrc);
338     dst = PySurface_AsSurface(pydst);
339 
340     Py_BEGIN_ALLOW_THREADS
341 
342     srcpixels = (char *) src->pixels;
343     dstpixels = (char *) dst->pixels;
344     srcpitch = src->pitch;
345     dstpitch = dst->pitch;
346     srcw = src->w;
347     dstw = dst->w;
348     srch = src->h;
349     dsth = dst->h;
350 
351     srcrow = srcpixels;
352     dstrow = dstpixels;
353 
354     for (y = 0; y < srch; y++) {
355         srcp = srcrow;
356         dstp = dstrow;
357 
358 
359         for (x = 0; x < srcw; x++) {
360             *dstp++ = rmap[(unsigned char) *srcp++];
361             *dstp++ = gmap[(unsigned char) *srcp++];
362             *dstp++ = bmap[(unsigned char) *srcp++];
363             *dstp++ = amap[(unsigned char) *srcp++];
364         }
365 
366         srcrow += srcpitch;
367         dstrow += dstpitch;
368     }
369 
370     Py_END_ALLOW_THREADS
371 }
372 
map24_core(PyObject * pysrc,PyObject * pydst,char * rmap,char * gmap,char * bmap)373 void map24_core(PyObject *pysrc,
374                 PyObject *pydst,
375                 char *rmap,
376                 char *gmap,
377                 char *bmap) {
378 
379 
380     SDL_Surface *src;
381     SDL_Surface *dst;
382 
383     int x, y;
384     Uint32 srcpitch, dstpitch;
385     Uint32 srcw, srch;
386     Uint32 dstw, dsth;
387 
388     char *srcpixels;
389     char *dstpixels;
390 
391     char *srcrow;
392     char *dstrow;
393     char *srcp;
394     char *dstp;
395 
396     src = PySurface_AsSurface(pysrc);
397     dst = PySurface_AsSurface(pydst);
398 
399     Py_BEGIN_ALLOW_THREADS
400 
401     srcpixels = (char *) src->pixels;
402     dstpixels = (char *) dst->pixels;
403     srcpitch = src->pitch;
404     dstpitch = dst->pitch;
405     srcw = src->w;
406     dstw = dst->w;
407     srch = src->h;
408     dsth = dst->h;
409 
410     srcrow = srcpixels;
411     dstrow = dstpixels;
412 
413     for (y = 0; y < srch; y++) {
414         srcp = srcrow;
415         dstp = dstrow;
416 
417 
418         for (x = 0; x < srcw; x++) {
419             *dstp++ = rmap[(unsigned char) *srcp++];
420             *dstp++ = gmap[(unsigned char) *srcp++];
421             *dstp++ = bmap[(unsigned char) *srcp++];
422         }
423 
424         srcrow += srcpitch;
425         dstrow += dstpitch;
426     }
427 
428     Py_END_ALLOW_THREADS
429 }
430 
431 /*
432  * This expects pysrc and pydst to be surfaces of the same size. It
433  * the source surface to the destination surface, using the r, g, b,
434  * and a maps. These maps are expected to be 256 bytes long, with each
435  * byte corresponding to a possible value of a channel in pysrc,
436  * giving what that value is mapped to in pydst.
437  */
linmap32_core(PyObject * pysrc,PyObject * pydst,int rmul,int gmul,int bmul,int amul)438 void linmap32_core(PyObject *pysrc,
439                 PyObject *pydst,
440                 int rmul,
441                 int gmul,
442                 int bmul,
443                 int amul) {
444 
445     SDL_Surface *src;
446     SDL_Surface *dst;
447 
448     int x, y;
449     Uint32 srcpitch, dstpitch;
450     Uint32 srcw, srch;
451     Uint32 dstw, dsth;
452 
453     char *srcpixels;
454     char *dstpixels;
455 
456     char *srcrow;
457     char *dstrow;
458     char *srcp;
459     char *dstp;
460 
461     src = PySurface_AsSurface(pysrc);
462     dst = PySurface_AsSurface(pydst);
463 
464     Py_BEGIN_ALLOW_THREADS
465 
466     srcpixels = (char *) src->pixels;
467     dstpixels = (char *) dst->pixels;
468     srcpitch = src->pitch;
469     dstpitch = dst->pitch;
470     srcw = src->w;
471     dstw = dst->w;
472     srch = src->h;
473     dsth = dst->h;
474 
475     srcrow = srcpixels;
476     dstrow = dstpixels;
477 
478     for (y = 0; y < srch; y++) {
479         srcp = srcrow;
480         dstp = dstrow;
481 
482 
483         for (x = 0; x < srcw; x++) {
484             *dstp++ = ((unsigned char) *srcp++) * rmul >> 8;
485             *dstp++ = ((unsigned char) *srcp++) * gmul >> 8;
486             *dstp++ = ((unsigned char) *srcp++) * bmul >> 8;
487             *dstp++ = ((unsigned char) *srcp++) * amul >> 8;
488         }
489 
490         srcrow += srcpitch;
491         dstrow += dstpitch;
492     }
493 
494     Py_END_ALLOW_THREADS
495 }
496 
linmap24_core(PyObject * pysrc,PyObject * pydst,int rmul,int gmul,int bmul)497 void linmap24_core(PyObject *pysrc,
498                 PyObject *pydst,
499                 int rmul,
500                 int gmul,
501                 int bmul) {
502 
503 
504     SDL_Surface *src;
505     SDL_Surface *dst;
506 
507     int x, y;
508     Uint32 srcpitch, dstpitch;
509     Uint32 srcw, srch;
510     Uint32 dstw, dsth;
511 
512     char *srcpixels;
513     char *dstpixels;
514 
515     char *srcrow;
516     char *dstrow;
517     char *srcp;
518     char *dstp;
519 
520     src = PySurface_AsSurface(pysrc);
521     dst = PySurface_AsSurface(pydst);
522 
523     Py_BEGIN_ALLOW_THREADS
524 
525     srcpixels = (char *) src->pixels;
526     dstpixels = (char *) dst->pixels;
527     srcpitch = src->pitch;
528     dstpitch = dst->pitch;
529     srcw = src->w;
530     dstw = dst->w;
531     srch = src->h;
532     dsth = dst->h;
533 
534     srcrow = srcpixels;
535     dstrow = dstpixels;
536 
537     for (y = 0; y < srch; y++) {
538         srcp = srcrow;
539         dstp = dstrow;
540 
541 
542         for (x = 0; x < srcw; x++) {
543             *dstp++ = ((unsigned char) *srcp++) * rmul >> 8;
544             *dstp++ = ((unsigned char) *srcp++) * gmul >> 8;
545             *dstp++ = ((unsigned char) *srcp++) * bmul >> 8;
546         }
547 
548         srcrow += srcpitch;
549         dstrow += dstpitch;
550     }
551 
552     Py_END_ALLOW_THREADS
553 }
554 
555 /*
556  * Helper function to describe averaging filters (AFs) needed to
557  * approximate a specific Gaussian. Takes a desired standard deviation
558  * and number of passes and produces lower and upper AF widths and the
559  * number of passes to perform with the lower AF width.
560  * ref: Peter Kovesi, "Fast Almost-Gaussian Filtering", 2010
561  *      section II; equations 3 and 5
562  *      https://www.peterkovesi.com/papers/FastGaussianSmoothing.pdf
563  */
blur_filters(float sigma,int n,int * wl,int * wu,int * m)564 void blur_filters(float sigma, int n, int *wl, int *wu, int *m) {
565     *wl = (int) floor(sqrt(12 * sigma * sigma / n + 1));
566     if (*wl % 2 == 0) (*wl)--;
567     *wu = *wl + 2;
568     *m = (int) round(
569         (12 * sigma * sigma - n * *wl * *wl - 4 * n * *wl - 3 * n)
570         / (-4 * *wl - 4)
571     );
572 }
573 
574 /*
575  * This expects pysrc, pywrk and pydst to be surfaces of the same size.
576  * It approximates a Gaussian blur using several box blurs. Box sizes
577  * are AF widths as described by blur_filters. Box blurs are performed
578  * using two passes of a one-dimensional blur, on the x and y axes
579  * respectively. The pywrk surface is used to hold intermediate results
580  * only and should not be treated as valid output.
581  * ref: Ivan Kutskir, "Fastest Gaussian Blur (in linear time)", 2013
582  *      http://blog.ivank.net/fastest-gaussian-blur.html
583  */
blur32_core(PyObject * pysrc,PyObject * pywrk,PyObject * pydst,float xrad,float yrad)584 void blur32_core(PyObject *pysrc,
585                  PyObject *pywrk,
586                  PyObject *pydst,
587                  float xrad,
588                  float yrad) {
589 
590     int n = 3; // number of passes, no more than six
591 
592     int xl, xu, xm;
593     int yl, yu, ym;
594 
595     blur_filters(xrad, n, &xl, &xu, &xm);
596 
597     if (xrad != yrad) {
598         blur_filters(yrad, n, &yl, &yu, &ym);
599     } else {
600         yl = xl; yu = xu; ym = xm;
601     }
602 
603     for (int i = 0; i < n; i++) {
604         int xr = i < xm ? xl : xu;
605         linblur32_core(pysrc, pywrk, xr, 0);
606         int yr = i < ym ? yl : yu;
607         linblur32_core(pywrk, pydst, yr, 1);
608         pysrc = pydst;
609     }
610 }
611 
blur24_core(PyObject * pysrc,PyObject * pywrk,PyObject * pydst,float xrad,float yrad)612 void blur24_core(PyObject *pysrc,
613                  PyObject *pywrk,
614                  PyObject *pydst,
615                  float xrad,
616                  float yrad) {
617 
618     int n = 3; // number of passes, no more than six
619 
620     int xl, xu, xm;
621     int yl, yu, ym;
622 
623     blur_filters(xrad, n, &xl, &xu, &xm);
624 
625     if (xrad != yrad) {
626         blur_filters(yrad, n, &yl, &yu, &ym);
627     } else {
628         yl = xl; yu = xu; ym = xm;
629     }
630 
631     for (int i = 0; i < n; i++) {
632         int xr = i < xm ? xl : xu;
633         linblur24_core(pysrc, pywrk, xr, 0);
634         int yr = i < ym ? yl : yu;
635         linblur24_core(pywrk, pydst, yr, 1);
636         pysrc = pydst;
637     }
638 }
639 
640 /*
641  * This expects pysrc and pydst to be surfaces of the same size. It
642  * implements a linear time one-dimensional blur using accumulators,
643  * with a sample size of twice the radius plus one. It can operate in
644  * both the x and y axes.
645  */
linblur32_core(PyObject * pysrc,PyObject * pydst,int radius,int vertical)646 void linblur32_core(PyObject *pysrc,
647                     PyObject *pydst,
648                     int radius,
649                     int vertical) {
650 
651     int c, r;
652 
653     SDL_Surface *src;
654     SDL_Surface *dst;
655 
656     Uint32 rows, cols;
657     Uint32 incr, skip;
658 
659     unsigned char *srcpixels;
660     unsigned char *dstpixels;
661 
662     unsigned char *dstp;
663 
664     src = PySurface_AsSurface(pysrc);
665     dst = PySurface_AsSurface(pydst);
666 
667     Py_BEGIN_ALLOW_THREADS
668 
669     srcpixels = (unsigned char *) src->pixels;
670     dstpixels = (unsigned char *) dst->pixels;
671 
672     if (vertical) {
673         rows = dst->w;
674         skip = 4;
675         incr = dst->pitch - 4;
676         cols = dst->h;
677     } else {
678         rows = dst->h;
679         skip = dst->pitch;
680         incr = 0;
681         cols = dst->w;
682     }
683 
684     int divisor = radius * 2 + 1;
685 
686     for (r = 0; r < rows; r++) {
687         // The values of the pixels on the left and right ends of the
688         // line.
689         unsigned char lr, lg, lb, la;
690         unsigned char rr, rg, rb, ra;
691 
692         unsigned char *leader = srcpixels + r * skip;
693         unsigned char *trailer = leader;
694         dstp = dstpixels + r * skip;
695 
696         lr = *leader;
697         lg = *(leader + 1);
698         lb = *(leader + 2);
699         la = *(leader + 3);
700 
701         int sumr = lr * radius;
702         int sumg = lg * radius;
703         int sumb = lb * radius;
704         int suma = la * radius;
705 
706         for (c = 0; c < radius; c++) {
707             sumr += *leader++;
708             sumg += *leader++;
709             sumb += *leader++;
710             suma += *leader++;
711             leader += incr;
712         }
713 
714         // left side of the kernel is off of the screen.
715         for (c = 0; c < radius; c++) {
716             sumr += *leader++;
717             sumg += *leader++;
718             sumb += *leader++;
719             suma += *leader++;
720             leader += incr;
721 
722             *dstp++ = sumr / divisor;
723             *dstp++ = sumg / divisor;
724             *dstp++ = sumb / divisor;
725             *dstp++ = suma / divisor;
726             dstp += incr;
727 
728             sumr -= lr;
729             sumg -= lg;
730             sumb -= lb;
731             suma -= la;
732         }
733 
734         int end = cols - radius - 1;
735 
736         // The kernel is fully on the screen.
737         for (; c < end; c++) {
738             sumr += *leader++;
739             sumg += *leader++;
740             sumb += *leader++;
741             suma += *leader++;
742             leader += incr;
743 
744             *dstp++ = sumr / divisor;
745             *dstp++ = sumg / divisor;
746             *dstp++ = sumb / divisor;
747             *dstp++ = suma / divisor;
748             dstp += incr;
749 
750             sumr -= *trailer++;
751             sumg -= *trailer++;
752             sumb -= *trailer++;
753             suma -= *trailer++;
754             trailer += incr;
755         }
756 
757         rr = *leader++;
758         rg = *leader++;
759         rb = *leader++;
760         ra = *leader++;
761 
762         // The kernel is off the right side of the screen.
763         for (; c < cols; c++) {
764             sumr += rr;
765             sumg += rg;
766             sumb += rb;
767             suma += ra;
768 
769             *dstp++ = sumr / divisor;
770             *dstp++ = sumg / divisor;
771             *dstp++ = sumb / divisor;
772             *dstp++ = suma / divisor;
773             dstp += incr;
774 
775             sumr -= *trailer++;
776             sumg -= *trailer++;
777             sumb -= *trailer++;
778             suma -= *trailer++;
779             trailer += incr;
780         }
781     }
782 
783     Py_END_ALLOW_THREADS
784 }
785 
linblur24_core(PyObject * pysrc,PyObject * pydst,int radius,int vertical)786 void linblur24_core(PyObject *pysrc,
787                     PyObject *pydst,
788                     int radius,
789                     int vertical) {
790 
791     int c, r;
792 
793     SDL_Surface *src;
794     SDL_Surface *dst;
795 
796     Uint32 rows, cols;
797     Uint32 incr, skip;
798 
799     unsigned char *srcpixels;
800     unsigned char *dstpixels;
801 
802     unsigned char *dstp;
803 
804     src = PySurface_AsSurface(pysrc);
805     dst = PySurface_AsSurface(pydst);
806 
807     Py_BEGIN_ALLOW_THREADS
808 
809     srcpixels = (unsigned char *) src->pixels;
810     dstpixels = (unsigned char *) dst->pixels;
811 
812     if (vertical) {
813         rows = dst->w;
814         skip = 3;
815         incr = dst->pitch - 3;
816         cols = dst->h;
817     } else {
818         rows = dst->h;
819         skip = dst->pitch;
820         incr = 0;
821         cols = dst->w;
822     }
823 
824     int divisor = radius * 2 + 1;
825 
826     for (r = 0; r < rows; r++) {
827         // The values of the pixels on the left and right ends of the
828         // line.
829         unsigned char lr, lg, lb;
830         unsigned char rr, rg, rb;
831 
832         unsigned char *leader = srcpixels + r * skip;
833         unsigned char *trailer = leader;
834         dstp = dstpixels + r * skip;
835 
836         lr = *leader;
837         lg = *(leader + 1);
838         lb = *(leader + 2);
839 
840         int sumr = lr * radius;
841         int sumg = lg * radius;
842         int sumb = lb * radius;
843 
844         for (c = 0; c < radius; c++) {
845             sumr += *leader++;
846             sumg += *leader++;
847             sumb += *leader++;
848             leader += incr;
849         }
850 
851         // left side of the kernel is off of the screen.
852         for (c = 0; c < radius; c++) {
853             sumr += *leader++;
854             sumg += *leader++;
855             sumb += *leader++;
856             leader += incr;
857 
858             *dstp++ = sumr / divisor;
859             *dstp++ = sumg / divisor;
860             *dstp++ = sumb / divisor;
861             dstp += incr;
862 
863             sumr -= lr;
864             sumg -= lg;
865             sumb -= lb;
866         }
867 
868         int end = cols - radius - 1;
869 
870         // The kernel is fully on the screen.
871         for (; c < end; c++) {
872             sumr += *leader++;
873             sumg += *leader++;
874             sumb += *leader++;
875             leader += incr;
876 
877             *dstp++ = sumr / divisor;
878             *dstp++ = sumg / divisor;
879             *dstp++ = sumb / divisor;
880             dstp += incr;
881 
882             sumr -= *trailer++;
883             sumg -= *trailer++;
884             sumb -= *trailer++;
885             trailer += incr;
886         }
887 
888         rr = *leader++;
889         rg = *leader++;
890         rb = *leader++;
891 
892         // The kernel is off the right side of the screen.
893         for (; c < cols; c++) {
894             sumr += rr;
895             sumg += rg;
896             sumb += rb;
897 
898             *dstp++ = sumr / divisor;
899             *dstp++ = sumg / divisor;
900             *dstp++ = sumb / divisor;
901             dstp += incr;
902 
903             sumr -= *trailer++;
904             sumg -= *trailer++;
905             sumb -= *trailer++;
906             trailer += incr;
907         }
908     }
909 
910     Py_END_ALLOW_THREADS
911 }
912 
913 // Alpha Munge takes a channel from the source pixel, maps it, and
914 // sticks it into the alpha channel of the destination, overwriting
915 // the destination's alpha channel.
916 //
917 // It's used to implement SmartDissolve.
918 
alphamunge_core(PyObject * pysrc,PyObject * pydst,int src_bypp,int src_aoff,int dst_aoff,char * amap)919 void alphamunge_core(PyObject *pysrc,
920                      PyObject *pydst,
921                      int src_bypp, // bytes per pixel.
922                      int src_aoff, // alpha offset.
923                      int dst_aoff, // alpha offset.
924                      char *amap) {
925 
926     int x, y;
927 
928     SDL_Surface *src;
929     SDL_Surface *dst;
930 
931     Uint32 srcpitch, dstpitch;
932     Uint32 srcw, srch;
933     Uint32 dstw, dsth;
934 
935     unsigned char *srcpixels;
936     unsigned char *dstpixels;
937 
938     unsigned char *srcline;
939     unsigned char *dstline;
940 
941     unsigned char *srcp;
942     unsigned char *dstp;
943 
944 
945     src = PySurface_AsSurface(pysrc);
946     dst = PySurface_AsSurface(pydst);
947 
948     Py_BEGIN_ALLOW_THREADS
949 
950     srcpixels = (unsigned char *) src->pixels;
951     dstpixels = (unsigned char *) dst->pixels;
952     srcpitch = src->pitch;
953     dstpitch = dst->pitch;
954     srcw = src->w;
955     dstw = dst->w;
956     srch = src->h;
957     dsth = dst->h;
958 
959 
960     // We assume that src is bigger than dst, and so use dst
961     // to handle everything.
962 
963     srcline = srcpixels;
964     dstline = dstpixels;
965 
966     for (y = 0; y < dsth; y++) {
967 
968         srcp = srcline + src_aoff;
969         dstp = dstline + dst_aoff;
970 
971         for (x = 0; x < dstw; x++) {
972 
973             *dstp = amap[*srcp];
974             srcp += src_bypp;
975             dstp += 4; // Need an alpha channel.
976         }
977 
978         srcline += srcpitch;
979         dstline += dstpitch;
980 
981     }
982 
983     Py_END_ALLOW_THREADS
984 }
985 
scale32_core(PyObject * pysrc,PyObject * pydst,float source_xoff,float source_yoff,float source_width,float source_height,float dest_xoff,float dest_yoff,float dest_width,float dest_height,int precise)986 void scale32_core(PyObject *pysrc, PyObject *pydst,
987                   float source_xoff, float source_yoff,
988                   float source_width, float source_height,
989                   float dest_xoff, float dest_yoff,
990                   float dest_width, float dest_height,
991                   int precise
992     ) {
993 
994 
995     SDL_Surface *src;
996     SDL_Surface *dst;
997 
998     int y;
999     Uint32 srcpitch, dstpitch;
1000     Uint32 srcw, srch;
1001     Uint32 dstw, dsth;
1002     float xdelta, ydelta;
1003 
1004     unsigned char *srcpixels;
1005     unsigned char *dstpixels;
1006 
1007 
1008     src = PySurface_AsSurface(pysrc);
1009     dst = PySurface_AsSurface(pydst);
1010 
1011     Py_BEGIN_ALLOW_THREADS
1012 
1013     srcpixels = (unsigned char *) src->pixels;
1014     dstpixels = (unsigned char *) dst->pixels;
1015     srcpitch = src->pitch;
1016     dstpitch = dst->pitch;
1017     srcw = src->w;
1018     dstw = dst->w;
1019     srch = src->h;
1020     dsth = dst->h;
1021 
1022     if (precise) {
1023 
1024         if (dest_width > 1) {
1025             xdelta = 256.0 * (source_width - 1) / (dest_width - 1);
1026         } else {
1027             xdelta = 0;
1028         }
1029 
1030         if (dest_height > 1) {
1031             ydelta = 256.0 * (source_height - 1) / (dest_height - 1);
1032         } else {
1033             ydelta = 0;
1034         }
1035 
1036     } else {
1037         xdelta = 255.0 * (source_width - 1) / dest_width;
1038         ydelta = 255.0 * (source_height - 1) / dest_height;
1039     }
1040 
1041     for (y = 0; y < dsth; y++) {
1042 
1043         unsigned char *s0;
1044         unsigned char *s1;
1045         unsigned char *d;
1046         unsigned char *dend;
1047 
1048         int sline;
1049         short s0frac;
1050         short s1frac;
1051         float scol;
1052 
1053         d = dstpixels + dstpitch * y;
1054         dend = d + 4 * dstw; // bpp
1055 
1056         sline = source_yoff * 256 + (y + dest_yoff) * ydelta;
1057         s1frac = (int) sline & 255;
1058         s0frac = 256 - s1frac;
1059 
1060         s0 = srcpixels + (sline >> 8) * srcpitch;
1061         s1 = s0 + srcpitch;
1062 
1063         scol = source_xoff * 256 + dest_xoff * xdelta;
1064 
1065         while (d < dend) {
1066 
1067             unsigned char *s0p;
1068             unsigned char *s1p;
1069 
1070             short xfrac = 256 - ((int) scol & 255);
1071             unsigned short r, g, b, a;
1072 
1073             s0p = s0 + ((int) scol >> 8) * 4; // bpp
1074             s1p = s0p + srcpitch;
1075 
1076             r = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1077             g = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1078             b = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1079             a = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1080 
1081             xfrac = 256 - xfrac;
1082 
1083             r += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1084             g += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1085             b += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1086             a += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1087 
1088             *d++ = r >> 8;
1089             *d++ = g >> 8;
1090             *d++ = b >> 8;
1091             *d++ = a >> 8;
1092 
1093             scol += xdelta;
1094         }
1095     }
1096 
1097     Py_END_ALLOW_THREADS
1098 }
1099 
1100 
scale24_core(PyObject * pysrc,PyObject * pydst,float source_xoff,float source_yoff,float source_width,float source_height,float dest_xoff,float dest_yoff,float dest_width,float dest_height)1101 void scale24_core(PyObject *pysrc, PyObject *pydst,
1102                   float source_xoff, float source_yoff,
1103                   float source_width, float source_height,
1104                   float dest_xoff, float dest_yoff,
1105                   float dest_width, float dest_height) {
1106 
1107 
1108     SDL_Surface *src;
1109     SDL_Surface *dst;
1110 
1111     int y;
1112     Uint32 srcpitch, dstpitch;
1113     Uint32 srcw, srch;
1114     Uint32 dstw, dsth;
1115     float xdelta, ydelta;
1116 
1117     unsigned char *srcpixels;
1118     unsigned char *dstpixels;
1119 
1120 
1121     src = PySurface_AsSurface(pysrc);
1122     dst = PySurface_AsSurface(pydst);
1123 
1124     Py_BEGIN_ALLOW_THREADS
1125 
1126     srcpixels = (unsigned char *) src->pixels;
1127     dstpixels = (unsigned char *) dst->pixels;
1128     srcpitch = src->pitch;
1129     dstpitch = dst->pitch;
1130     srcw = src->w;
1131     dstw = dst->w;
1132     srch = src->h;
1133     dsth = dst->h;
1134 
1135     xdelta = 255.0 * (source_width - 1) / dest_width;
1136     ydelta = 255.0 * (source_height - 1) / dest_height;
1137 
1138     for (y = 0; y < dsth; y++) {
1139 
1140         unsigned char *s0;
1141         unsigned char *s1;
1142         unsigned char *d;
1143         unsigned char *dend;
1144 
1145         int sline;
1146         short s0frac;
1147         short s1frac;
1148         float scol;
1149 
1150         d = dstpixels + dstpitch * y;
1151         dend = d + 3 * dstw; // bpp
1152 
1153         sline = source_yoff * 255 + (y + dest_yoff) * ydelta;
1154         s1frac = (int) sline & 255;
1155         s0frac = 256 - s1frac;
1156 
1157         s0 = srcpixels + (sline >> 8) * srcpitch;
1158         s1 = s0 + srcpitch;
1159 
1160         scol = source_xoff * 255 + dest_xoff * xdelta;
1161 
1162         while (d < dend) {
1163 
1164             unsigned char *s0p;
1165             unsigned char *s1p;
1166 
1167             short xfrac = 256 - ((int) scol & 255);
1168             unsigned short r, g, b;
1169 
1170             s0p = s0 + ((int) scol >> 8) * 3; // bpp
1171             s1p = s0p + srcpitch;
1172 
1173             r = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1174             g = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1175             b = (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1176 
1177             xfrac = 256 - xfrac;
1178 
1179             r += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1180             g += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1181             b += (((*s0p++ * s0frac) + (*s1p++ * s1frac)) >> 8) * xfrac;
1182 
1183             *d++ = r >> 8;
1184             *d++ = g >> 8;
1185             *d++ = b >> 8;
1186 
1187             scol += xdelta;
1188         }
1189     }
1190 
1191     Py_END_ALLOW_THREADS
1192 }
1193 
1194 #define I(a, b, mul) ((((((b - a) * mul)) >> 8) + a) & 0xff00ff)
1195 
1196 /** This appears to limit the expansion, such that 1/x yields a max
1197     expansion of lg x */
1198 #define EPSILON (1.0 / 256.0)
1199 
1200 /****************************************************************************/
1201 /* A similar concept to rotozoom, but implemented differently, so we
1202    can limit the target area. */
transform32_std(PyObject * pysrc,PyObject * pydst,float corner_x,float corner_y,float xdx,float ydx,float xdy,float ydy,int ashift,float a,int precise)1203 int transform32_std(PyObject *pysrc, PyObject *pydst,
1204                     float corner_x, float corner_y,
1205                     float xdx, float ydx,
1206                     float xdy, float ydy,
1207                     int ashift,
1208                     float a,
1209                     int precise
1210     ) {
1211 
1212     SDL_Surface *src;
1213     SDL_Surface *dst;
1214 
1215     int y;
1216     int srcpitch, dstpitch;
1217     int srcw, srch;
1218     int dstw, dsth;
1219 
1220     // The x and y source pixel coordinates, times 65536. And their
1221     // delta-per-dest-x-pixel.
1222     int sxi = 0, syi = 0, dsxi = 0, dsyi = 0;
1223 
1224     unsigned char *srcpixels;
1225     unsigned char *dstpixels;
1226 
1227     src = PySurface_AsSurface(pysrc);
1228     dst = PySurface_AsSurface(pydst);
1229 
1230     Py_BEGIN_ALLOW_THREADS
1231 
1232     srcpixels = (unsigned char *) src->pixels;
1233     dstpixels = (unsigned char *) dst->pixels;
1234     srcpitch = src->pitch;
1235     dstpitch = dst->pitch;
1236     srcw = src->w;
1237     dstw = dst->w;
1238     srch = src->h;
1239     dsth = dst->h;
1240 
1241     // Compute the coloring multiplier.
1242     unsigned int amul = (unsigned int) (a * 256);
1243 
1244     // Compute the maximum x and y coordinates.
1245     double maxsx = srcw;
1246     double maxsy = srch;
1247 
1248     // Deal with pre-6.10.1 versions of Ren'Py, which didn't give us
1249     // that 1px border that allows us to be precise.
1250     if (! precise) {
1251         maxsx -= EPSILON;
1252         maxsy -= EPSILON;
1253 
1254         // If a delta is too even, subtract epsilon (towards 0) from it.
1255         if (xdx && fabs(fmodf(1.0 / xdx, 1)) < EPSILON) {
1256             xdx -= (xdx / fabs(xdx)) * EPSILON;
1257         }
1258         if (xdy && fabs(fmodf(1.0 / xdy, 1)) < EPSILON) {
1259             xdy -= (xdy / fabs(xdy)) * EPSILON;
1260         }
1261         if (ydx && fabs(fmodf(1.0 / ydx, 1)) < EPSILON) {
1262             ydx -= (ydx / fabs(ydx)) * EPSILON;
1263         }
1264         if (ydy && fabs(fmodf(1.0 / ydy, 1)) < EPSILON) {
1265             ydy -= (ydy / fabs(ydy)) * EPSILON;
1266         }
1267     }
1268 
1269 
1270     // Loop through every line.
1271     for (y = 0; y < dsth; y++) {
1272 
1273         // The source coordinates of the leftmost pixel in the line.
1274         double leftsx = corner_x + y * xdy;
1275         double leftsy = corner_y + y * ydy;
1276 
1277         // Min and max x-extent to draw on the current line.
1278         double minx = 0;
1279         double maxx = dstw - 1;
1280 
1281         // Figure out the x-extent based on xdx.
1282         if (xdx) {
1283             double x1 = (0.0 - leftsx) / xdx;
1284             double x2 = (maxsx - leftsx) / xdx;
1285 
1286             if (x1 < x2) {
1287                 minx = fmax(x1, minx);
1288                 maxx = fmin(x2, maxx);
1289             } else {
1290                 minx = fmax(x2, minx);
1291                 maxx = fmin(x1, maxx);
1292             }
1293 
1294         } else {
1295             if (leftsx < 0 || leftsx > maxsx) {
1296                continue;
1297             }
1298         }
1299 
1300         // Figure out the x-extent based on ydx.
1301         if (ydx) {
1302             double x1 = (0.0 - leftsy) / ydx;
1303             double x2 = (maxsy - leftsy) / ydx;
1304 
1305             if (x1 < x2) {
1306                 minx = fmax(x1, minx);
1307                 maxx = fmin(x2, maxx);
1308             } else {
1309                 minx = fmax(x2, minx);
1310                 maxx = fmin(x1, maxx);
1311             }
1312 
1313         } else {
1314             if (leftsy < 0 || leftsy > maxsy) {
1315                 continue;
1316             }
1317         }
1318 
1319         minx = ceil(minx);
1320         maxx = floor(maxx);
1321 
1322         if (minx >= maxx) {
1323             continue;
1324         }
1325 
1326         // The start and end of line pointers.
1327         unsigned char *d = dstpixels + dstpitch * y;
1328         unsigned char *dend = d + 4 * (int) maxx;
1329 
1330         // Advance start of line by 4.
1331         d += 4 * (int) minx;
1332 
1333         // Starting coordinates and deltas.
1334         sxi = (int) ((leftsx + minx * xdx) * 65536);
1335         syi = (int) ((leftsy + minx * ydx) * 65536);
1336         dsxi = (int) (xdx * 65536);
1337         dsyi = (int) (ydx * 65536);
1338 
1339         while (d <= dend) {
1340 
1341             int px = sxi >> 16;
1342             int py = syi >> 16;
1343 
1344             unsigned char *sp = srcpixels + py * srcpitch + px * 4;
1345 
1346             unsigned int yfrac = (syi >> 8) & 0xff; // ((short) sy) & 0xff;
1347             unsigned int xfrac = (sxi >> 8) & 0xff; // ((short) sx) & 0xff;
1348 
1349             unsigned int pal = *(unsigned int *) sp;
1350             unsigned int pbl = *(unsigned int *) (sp + 4);
1351             sp += srcpitch;
1352             unsigned int pcl = *(unsigned int *) sp;
1353             unsigned int pdl = *(unsigned int *) (sp + 4);
1354 
1355             unsigned int pah = (pal >> 8) & 0xff00ff;
1356             unsigned int pbh = (pbl >> 8) & 0xff00ff;
1357             unsigned int pch = (pcl >> 8) & 0xff00ff;
1358             unsigned int pdh = (pdl >> 8) & 0xff00ff;
1359 
1360             pal &= 0xff00ff;
1361             pbl &= 0xff00ff;
1362             pcl &= 0xff00ff;
1363             pdl &= 0xff00ff;
1364 
1365             unsigned int rh = I(I(pah, pch, yfrac), I(pbh, pdh, yfrac), xfrac);
1366             unsigned int rl = I(I(pal, pcl, yfrac), I(pbl, pdl, yfrac), xfrac);
1367 
1368             unsigned int alpha = (((rh << 8) | rl) >> ashift) & 0xff;
1369             alpha = (alpha * amul) >> 8;
1370 
1371             unsigned int dl = * (unsigned int *) d;
1372             unsigned int dh = (dl >> 8) & 0xff00ff;
1373             dl &= 0xff00ff;
1374 
1375             dl = I(dl, rl, alpha);
1376             dh = I(dh, rh, alpha);
1377 
1378             * (unsigned int *) d = (dh << 8) | dl;
1379 
1380             d += 4;
1381             sxi += dsxi;
1382             syi += dsyi;
1383         }
1384 
1385     }
1386 
1387     Py_END_ALLOW_THREADS;
1388 
1389 
1390     // This is bogus, and only serves to ensure that the FPU
1391     // computes these variables at the right times.
1392     return sxi + syi + dsxi + dsyi;
1393 }
1394 
1395 
1396 
1397 
transform32_core(PyObject * pysrc,PyObject * pydst,float corner_x,float corner_y,float xdx,float ydx,float xdy,float ydy,int ashift,float a,int precise)1398 void transform32_core(PyObject *pysrc, PyObject *pydst,
1399                       float corner_x, float corner_y,
1400                       float xdx, float ydx,
1401                       float xdy, float ydy,
1402                       int ashift,
1403                       float a,
1404                       int precise
1405     ) {
1406 
1407 
1408     transform32_std(pysrc, pydst, corner_x, corner_y,
1409                     xdx, ydx, xdy, ydy, ashift, a, precise);
1410 
1411 }
1412 
1413 
1414 
1415 
blend32_core_std(PyObject * pysrca,PyObject * pysrcb,PyObject * pydst,int alpha)1416 void blend32_core_std(PyObject *pysrca, PyObject *pysrcb, PyObject *pydst,
1417                       int alpha) {
1418 
1419     SDL_Surface *srca;
1420     SDL_Surface *srcb;
1421     SDL_Surface *dst;
1422 
1423     int srcapitch, srcbpitch, dstpitch;
1424     unsigned short dstw, dsth;
1425     unsigned short y;
1426 
1427     unsigned char *srcapixels;
1428     unsigned char *srcbpixels;
1429     unsigned char *dstpixels;
1430 
1431     srca = PySurface_AsSurface(pysrca);
1432     srcb = PySurface_AsSurface(pysrcb);
1433     dst = PySurface_AsSurface(pydst);
1434 
1435     Py_BEGIN_ALLOW_THREADS
1436 
1437     srcapixels = (unsigned char *) srca->pixels;
1438     srcbpixels = (unsigned char *) srcb->pixels;
1439     dstpixels = (unsigned char *) dst->pixels;
1440     srcapitch = srca->pitch;
1441     srcbpitch = srcb->pitch;
1442     dstpitch = dst->pitch;
1443     dstw = dst->w;
1444     dsth = dst->h;
1445 
1446     for (y = 0; y < dsth; y++) {
1447 
1448         unsigned int *dp = (unsigned int *)(dstpixels + dstpitch * y);
1449         unsigned int *dpe = dp + dstw;
1450 
1451         unsigned int *sap = (unsigned int *)(srcapixels + srcapitch * y);
1452         unsigned int *sbp = (unsigned int *)(srcbpixels + srcbpitch * y);
1453 
1454         while (dp < dpe) {
1455             unsigned int sal = *sap++;
1456             unsigned int sbl = *sbp++;
1457 
1458             unsigned int sah = (sal >> 8) & 0xff00ff;
1459             unsigned int sbh = (sbl >> 8) & 0xff00ff;
1460 
1461             sal &= 0xff00ff;
1462             sbl &= 0xff00ff;
1463 
1464             *dp++ = I(sal, sbl, alpha) | (I(sah, sbh, alpha) << 8);
1465         }
1466     }
1467 
1468     Py_END_ALLOW_THREADS
1469 
1470 }
1471 
blend32_core(PyObject * pysrca,PyObject * pysrcb,PyObject * pydst,int alpha)1472 void blend32_core(PyObject *pysrca, PyObject *pysrcb, PyObject *pydst,
1473                   int alpha) {
1474 
1475     blend32_core_std(pysrca, pysrcb, pydst, alpha);
1476 }
1477 
1478 
imageblend32_core_std(PyObject * pysrca,PyObject * pysrcb,PyObject * pydst,PyObject * pyimg,int alpha_off,char * amap)1479 void imageblend32_core_std(PyObject *pysrca, PyObject *pysrcb,
1480                            PyObject *pydst, PyObject *pyimg,
1481                            int alpha_off, char *amap) {
1482 
1483     SDL_Surface *srca;
1484     SDL_Surface *srcb;
1485     SDL_Surface *dst;
1486     SDL_Surface *img;
1487 
1488     int srcapitch, srcbpitch, dstpitch, imgpitch;
1489     unsigned short dstw, dsth;
1490     unsigned short y;
1491 
1492     unsigned char *srcapixels;
1493     unsigned char *srcbpixels;
1494     unsigned char *dstpixels;
1495     unsigned char *imgpixels;
1496 
1497     srca = PySurface_AsSurface(pysrca);
1498     srcb = PySurface_AsSurface(pysrcb);
1499     dst = PySurface_AsSurface(pydst);
1500     img = PySurface_AsSurface(pyimg);
1501 
1502     Py_BEGIN_ALLOW_THREADS
1503 
1504     srcapixels = (unsigned char *) srca->pixels;
1505     srcbpixels = (unsigned char *) srcb->pixels;
1506     dstpixels = (unsigned char *) dst->pixels;
1507     imgpixels = (unsigned char *) img->pixels;
1508     srcapitch = srca->pitch;
1509     srcbpitch = srcb->pitch;
1510     dstpitch = dst->pitch;
1511     imgpitch = img->pitch;
1512 
1513     dstw = dst->w;
1514     dsth = dst->h;
1515 
1516     for (y = 0; y < dsth; y++) {
1517 
1518         unsigned int *dp = (unsigned int *)(dstpixels + dstpitch * y);
1519         unsigned int *dpe = dp + dstw;
1520 
1521         unsigned int *sap = (unsigned int *)(srcapixels + srcapitch * y);
1522         unsigned int *sbp = (unsigned int *)(srcbpixels + srcbpitch * y);
1523 
1524         unsigned char *ip = (unsigned char *)(imgpixels + imgpitch * y);
1525         ip += alpha_off;
1526 
1527         while (dp < dpe) {
1528             unsigned char alpha = (unsigned char) amap[*ip];
1529             ip += 4;
1530 
1531             unsigned int sal = *sap++;
1532             unsigned int sbl = *sbp++;
1533 
1534             unsigned int sah = (sal >> 8) & 0xff00ff;
1535             unsigned int sbh = (sbl >> 8) & 0xff00ff;
1536 
1537             sal &= 0xff00ff;
1538             sbl &= 0xff00ff;
1539 
1540             *dp++ = I(sal, sbl, alpha) | (I(sah, sbh, alpha) << 8);
1541         }
1542     }
1543 
1544     Py_END_ALLOW_THREADS
1545 }
1546 
1547 
imageblend32_core(PyObject * pysrca,PyObject * pysrcb,PyObject * pydst,PyObject * pyimg,int aoff,char * amap)1548 void imageblend32_core(PyObject *pysrca, PyObject *pysrcb,
1549                        PyObject *pydst, PyObject *pyimg,
1550                        int aoff, char *amap) {
1551 
1552     imageblend32_core_std(pysrca, pysrcb, pydst, pyimg, aoff, amap);
1553 }
1554 
1555 
colormatrix32_core(PyObject * pysrc,PyObject * pydst,float c00,float c01,float c02,float c03,float c04,float c10,float c11,float c12,float c13,float c14,float c20,float c21,float c22,float c23,float c24,float c30,float c31,float c32,float c33,float c34)1556 void colormatrix32_core(PyObject *pysrc, PyObject *pydst,
1557                         float c00, float c01, float c02, float c03, float c04,
1558                         float c10, float c11, float c12, float c13, float c14,
1559                         float c20, float c21, float c22, float c23, float c24,
1560                         float c30, float c31, float c32, float c33, float c34) {
1561 
1562     SDL_Surface *src;
1563     SDL_Surface *dst;
1564 
1565     int srcpitch, dstpitch;
1566     unsigned short dstw, dsth;
1567     unsigned short y;
1568 
1569     unsigned char *srcpixels;
1570     unsigned char *dstpixels;
1571 
1572     src = PySurface_AsSurface(pysrc);
1573     dst = PySurface_AsSurface(pydst);
1574 
1575     Py_BEGIN_ALLOW_THREADS
1576 
1577     srcpixels = (unsigned char *) src->pixels;
1578     dstpixels = (unsigned char *) dst->pixels;
1579     srcpitch = src->pitch;
1580     dstpitch = dst->pitch;
1581 
1582     dstw = dst->w;
1583     dsth = dst->h;
1584 
1585     int o0 = c04 * 255;
1586     int o1 = c14 * 255;
1587     int o2 = c24 * 255;
1588     int o3 = c34 * 255;
1589 
1590     for (y = 0; y < dsth; y++) {
1591 
1592         int r;
1593 
1594         unsigned char *dp =  dstpixels + dstpitch * y;
1595         unsigned char *dpe = dp + dstw * 4;
1596         unsigned char *sp = srcpixels + srcpitch * y;
1597 
1598         while (dp < dpe) {
1599             unsigned char s0 = *sp++;
1600             unsigned char s1 = *sp++;
1601             unsigned char s2 = *sp++;
1602             unsigned char s3 = *sp++;
1603 
1604 /*             *dp++ = (unsigned char) */
1605 /*                 fminf(255, fmaxf(0, fmaf(s0, c00, fmaf(s1, c01, fmaf(s2, c02, fmaf(s3, c03, o0)))))); */
1606 /*             *dp++ = (unsigned char) */
1607 /*                 fminf(255, fmaxf(0, fmaf(s0, c10, fmaf(s1, c11, fmaf(s2, c12, fmaf(s3, c13, o1)))))); */
1608 /*             *dp++ = (unsigned char) */
1609 /*                 fminf(255, fmaxf(0, fmaf(s0, c20, fmaf(s1, c21, fmaf(s2, c22, fmaf(s3, c23, o2)))))); */
1610 /*             *dp++ = (unsigned char) */
1611 /*                 fminf(255, fmaxf(0, fmaf(s0, c30, fmaf(s1, c31, fmaf(s2, c32, fmaf(s3, c33, o3)))))); */
1612 
1613             r = o0 + (int) (c00 * s0 + c01 * s1 + c02 * s2 + c03 * s3);
1614             if (r < 0) r = 0;
1615             if (r > 255) r = 255;
1616             *dp++ = r;
1617 
1618             r = o1 + (int) (c10 * s0 + c11 * s1 + c12 * s2 + c13 * s3);
1619             if (r < 0) r = 0;
1620             if (r > 255) r = 255;
1621             *dp++ = r;
1622 
1623             r = o2 + (int) (c20 * s0 + c21 * s1 + c22 * s2 + c23 * s3);
1624             if (r < 0) r = 0;
1625             if (r > 255) r = 255;
1626             *dp++ = r;
1627 
1628             r = o3 + (int) (c30 * s0 + c31 * s1 + c32 * s2 + c33 * s3);
1629             if (r < 0) r = 0;
1630             if (r > 255) r = 255;
1631             *dp++ = r;
1632         }
1633     }
1634 
1635     Py_END_ALLOW_THREADS
1636 }
1637 
staticgray_core(PyObject * pysrc,PyObject * pydst,int rmul,int gmul,int bmul,int amul,int shift,char * vmap)1638 void staticgray_core(PyObject *pysrc, PyObject *pydst,
1639                      int rmul, int gmul, int bmul, int amul, int shift, char *vmap) {
1640 
1641     SDL_Surface *src;
1642     SDL_Surface *dst;
1643 
1644     int srcpitch, dstpitch;
1645     unsigned short dstw, dsth;
1646     unsigned short x, y;
1647 
1648     unsigned char *srcpixels;
1649     unsigned char *dstpixels;
1650 
1651     src = PySurface_AsSurface(pysrc);
1652     dst = PySurface_AsSurface(pydst);
1653 
1654     Py_BEGIN_ALLOW_THREADS;
1655 
1656     srcpixels = (unsigned char *) src->pixels;
1657     dstpixels = (unsigned char *) dst->pixels;
1658     srcpitch = src->pitch;
1659     dstpitch = dst->pitch;
1660 
1661     dstw = dst->w;
1662     dsth = dst->h;
1663 
1664     for (y = 0; y < dsth; y++) {
1665         unsigned char *s = &srcpixels[y * srcpitch];
1666         unsigned char *d = &dstpixels[y * dstpitch];
1667 
1668         for (x = 0; x < dstw; x++) {
1669             int sum = 0;
1670 
1671             sum += *s++ * rmul;
1672             sum += *s++ * gmul;
1673             sum += *s++ * bmul;
1674             sum += *s++ * amul;
1675             *d++ = (unsigned char) vmap[sum >> shift];
1676         }
1677     }
1678 
1679     Py_END_ALLOW_THREADS;
1680 }
1681