1 // A.ASM replacement using C
2 // Mainly by Ken Silverman, with things melded with my port by
3 // Jonathon Fowler (jf@jonof.id.au)
4 //
5 // "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
6 // Ken Silverman's official web site: "http://www.advsys.net/ken"
7 // See the included license file "BUILDLIC.TXT" for license info.
8 //
9 // This file has been modified from Ken Silverman's original release
10 // by Jonathon Fowler (jf@jonof.id.au)
11 // by the EDuke32 team (development@voidpoint.com)
12 
13 #include "a.h"
14 #include "pragmas.h"
15 
16 #ifdef ENGINE_USING_A_C
17 
18 #define BITSOFPRECISION 3
19 #define BITSOFPRECISIONPOW 8
20 
21 // Compile code to saturate vplc for sprites to prevent stray lines at the
22 // bottom of non-y-flipped ones?
23 #define USE_SATURATE_VPLC
24 // Also for translucent masks?
25 //#define USE_SATURATE_VPLC_TRANS
26 
27 extern intptr_t asm1, asm2, asm3, asm4;
28 extern int32_t globalx3, globaly3;
29 
30 #ifdef USE_ASM64
31 # define A64_ASSIGN(var, val) var=val
32 #else
33 # define A64_ASSIGN(var, val)
34 #endif
35 
36 #ifdef USE_ASM64
37 // variables for a64.yasm
38 int32_t a64_bpl, a64_transmode, a64_glogy;
39 intptr_t a64_paloffs;
40 char *a64_gtrans;
41 #endif
42 
43 static int32_t bpl, transmode = 0;
44 static char *gbuf;
45 static int32_t glogx, glogy;
46 int32_t gpinc;
47 static int32_t gbxinc, gbyinc;
48 static char *gpal, *ghlinepal, *gtrans;
49 static char *gpal2;
50 
51 //Global variable functions
setvlinebpl(int32_t dabpl)52 void setvlinebpl(int32_t dabpl) { A64_ASSIGN(a64_bpl, dabpl); bpl = dabpl;}
fixtransluscence(intptr_t datransoff)53 void fixtransluscence(intptr_t datransoff)
54 {
55     A64_ASSIGN(a64_gtrans, (char *)datransoff);
56     gtrans = (char *)datransoff;
57 }
settransnormal(void)58 void settransnormal(void) { A64_ASSIGN(a64_transmode, 0); transmode = 0; }
settransreverse(void)59 void settransreverse(void) { A64_ASSIGN(a64_transmode, 1); transmode = 1; }
60 
61 
62 ///// Ceiling/floor horizontal line functions /////
63 
sethlinesizes(int32_t logx,int32_t logy,intptr_t bufplc)64 void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc)
65 { glogx = logx; glogy = logy; gbuf = (char *)bufplc; }
setpalookupaddress(char * paladdr)66 void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; }
setuphlineasm4(int32_t bxinc,int32_t byinc)67 void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; }
hlineasm4(bssize_t cnt,int32_t skiploadincs,int32_t paloffs,uint32_t by,uint32_t bx,intptr_t p)68 void hlineasm4(bssize_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p)
69 {
70     Bassert(gbuf);
71 
72     if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; }
73 
74     const char *const A_C_RESTRICT palptr = &ghlinepal[paloffs];
75     const char *const A_C_RESTRICT buf = gbuf;
76     const vec2_t inc = { gbxinc, gbyinc };
77     const vec2_t log = { glogx, glogy };
78     const vec2_t log32 = { 32-log.x, 32-log.y };
79     char *pp = (char *)p;
80 
81 #ifdef CLASSIC_SLICE_BY_4
82     for (; cnt>=4; cnt-=4, pp-=4)
83     {
84 #if 1
85         *pp = palptr[buf[((bx>>log32.x)<<log.y)+(by>>log32.y)]];
86         *(pp-1) = palptr[buf[(((bx-inc.x)>>log32.x)<<log.y)+((by-inc.y)>>log32.y)]];
87         *(pp-2) = palptr[buf[(((bx-(inc.x<<1))>>log32.x)<<log.y)+((by-(inc.y<<1))>>log32.y)]];
88         *(pp-3) = palptr[buf[(((bx-(inc.x*3))>>log32.x)<<log.y)+((by-(inc.y*3))>>log32.y)]];
89 #else
90         *(int32_t *)(pp-3) = palptr[buf[(((bx-(inc.x*3))>>log32.x)<<log.y)+((by-(inc.y*3))>>log32.y)]] +
91             (palptr[buf[(((bx-(inc.x<<1))>>log32.x)<<log.y)+((by-(inc.y<<1))>>log32.y)]]<<8) +
92             (palptr[buf[(((bx-inc.x)>>log32.x)<<log.y)+((by-inc.y)>>log32.y)]]<<16) +
93             (palptr[buf[((bx>>log32.x)<<log.y)+(by>>log32.y)]]<<24);
94 #endif
95         bx -= inc.x<<2;
96         by -= inc.y<<2;
97     }
98 #endif
99 
100     for (; cnt>=0; cnt--, pp--)
101     {
102         *pp = palptr[buf[((bx>>log32.x)<<log.y)+(by>>log32.y)]];
103         bx -= inc.x;
104         by -= inc.y;
105     }
106 }
107 
108 
109 ///// Sloped ceiling/floor vertical line functions /////
slopevlin(intptr_t p,int32_t i,intptr_t slopaloffs,bssize_t cnt,int32_t bx,int32_t by)110 void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, bssize_t cnt, int32_t bx, int32_t by)
111 {
112     intptr_t * A_C_RESTRICT slopalptr;
113     int32_t bz, bzinc;
114     uint32_t u, v;
115 
116     bz = asm3; bzinc = (asm1>>3);
117     slopalptr = (intptr_t *)slopaloffs;
118     for (; cnt>0; cnt--)
119     {
120         i = (sloptable[(bz>>6)+HALFSLOPTABLESIZ]); bz += bzinc;
121         u = bx+(inthi_t)globalx3*i;
122         v = by+(inthi_t)globaly3*i;
123         (*(char *)p) = *(char *)(((intptr_t)slopalptr[0])+gbuf[((u>>(32-glogx))<<glogy)+(v>>(32-glogy))]);
124         slopalptr--;
125         p += gpinc;
126     }
127 }
128 
129 
130 ///// Wall,face sprite/wall sprite vertical line functions /////
131 
132 
133 extern int32_t globaltilesizy;
134 
ourmulscale32(uint32_t a,uint32_t b)135 static inline uint32_t ourmulscale32(uint32_t a, uint32_t b)
136 {
137     return ((uint64_t)a*b)>>32;
138 }
139 
getpix(int32_t logy,const char * buf,uint32_t vplc)140 static inline int32_t getpix(int32_t logy, const char *buf, uint32_t vplc)
141 {
142     return logy ? buf[vplc>>logy] : buf[ourmulscale32(vplc,globaltilesizy)];
143 }
144 
setupvlineasm(int32_t neglogy)145 void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
146 // cnt+1 loop iterations!
vlineasm1(int32_t vinc,intptr_t paloffs,bssize_t cnt,uint32_t vplc,intptr_t bufplc,intptr_t p)147 int32_t vlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
148 {
149     const char *const A_C_RESTRICT buf = (char *)bufplc;
150     const char *const A_C_RESTRICT pal = (char *)paloffs;
151     const int32_t logy = glogy, ourbpl = bpl;
152     char *pp = (char *)p;
153 
154     cnt++;
155 
156     if (logy)
157     {
158 #ifdef CLASSIC_SLICE_BY_4
159         for (; cnt>=4; cnt-=4)
160         {
161             *pp = pal[buf[vplc>>logy]];
162             *(pp+ourbpl) = pal[buf[(vplc+vinc)>>logy]];
163             *(pp+(ourbpl<<1)) = pal[buf[(vplc+(vinc<<1))>>logy]];
164             *(pp+(ourbpl*3)) = pal[buf[(vplc+(vinc*3))>>logy ]];
165             pp += ourbpl<<2;
166             vplc += vinc<<2;
167         }
168 #endif
169         while (cnt--)
170         {
171             *pp = pal[buf[vplc>>logy]];
172             pp += ourbpl;
173             vplc += vinc;
174         }
175     }
176     else
177     {
178 #ifdef CLASSIC_SLICE_BY_4
179         for (; cnt>=4; cnt-=4)
180         {
181             *pp = pal[buf[ourmulscale32(vplc, globaltilesizy)]];
182             *(pp+ourbpl) = pal[buf[ourmulscale32((vplc+vinc),globaltilesizy)]];
183             *(pp+(ourbpl<<1)) = pal[buf[ourmulscale32((vplc+(vinc<<1)), globaltilesizy)]];
184             *(pp+(ourbpl*3)) = pal[buf[ourmulscale32((vplc+(vinc*3)), globaltilesizy)]];
185             pp += ourbpl<<2;
186             vplc += vinc<<2;
187         }
188 #endif
189         while (cnt--)
190         {
191             *pp = pal[buf[ourmulscale32(vplc,globaltilesizy)]], pp += ourbpl;
192             vplc += vinc;
193         }
194     }
195     return vplc;
196 }
197 
198 
199 extern intptr_t palookupoffse[4];
200 extern uint32_t vplce[4];
201 extern int32_t vince[4];
202 extern intptr_t bufplce[4];
203 
204 #if (EDUKE32_GCC_PREREQ(4,7) || __has_extension(attribute_ext_vector_type)) && defined BITNESS64
205 // XXX: The "Ubuntu clang version 3.5-1ubuntu1 (trunk) (based on LLVM 3.5)"
206 // does not compile us with USE_VECTOR_EXT. Maybe a newer one does?
207 # if !defined __clang__
208 #  define USE_VECTOR_EXT
209 # endif
210 #endif
211 
212 #ifdef USE_VECTOR_EXT
213 typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
214 #endif
215 
216 #ifdef USE_SATURATE_VPLC
217 # define saturate_vplc(vplc, vinc) vplc |= g_saturate & -(vplc < (uint32_t)vinc)
218 // NOTE: the vector types yield -1 for logical "true":
219 # define saturate_vplc_vec(vplc, vinc) vplc |= g_saturate & (vplc < vinc)
220 # ifdef USE_SATURATE_VPLC_TRANS
221 #  define saturate_vplc_trans(vplc, vinc) saturate_vplc(vplc, vinc)
222 # else
223 #  define saturate_vplc_trans(vplc, vinc)
224 # endif
225 #else
226 # define saturate_vplc(vplc, vinc)
227 # define saturate_vplc_vec(vplc, vinc)
228 # define saturate_vplc_trans(vplc, vinc)
229 #endif
230 
231 #ifdef CLASSIC_NONPOW2_YSIZE_WALLS
232 // cnt >= 1
vlineasm4nlogy(bssize_t cnt,char * p,char * const A_C_RESTRICT * pal,char * const A_C_RESTRICT * buf,uint32_vec4 vplc,const uint32_vec4 vinc)233 static void vlineasm4nlogy(bssize_t cnt, char *p, char *const A_C_RESTRICT * pal, char *const A_C_RESTRICT * buf,
234 # ifdef USE_VECTOR_EXT
235     uint32_vec4 vplc, const uint32_vec4 vinc)
236 # else
237     uint32_t * vplc, const int32_t *vinc)
238 # endif
239 {
240     const int32_t ourbpl = bpl;
241 
242     do
243     {
244         p[0] = pal[0][buf[0][ourmulscale32(vplc[0], globaltilesizy)]];
245         p[1] = pal[1][buf[1][ourmulscale32(vplc[1], globaltilesizy)]];
246         p[2] = pal[2][buf[2][ourmulscale32(vplc[2], globaltilesizy)]];
247         p[3] = pal[3][buf[3][ourmulscale32(vplc[3], globaltilesizy)]];
248 
249 # if defined USE_VECTOR_EXT
250         vplc += vinc;
251 # else
252         vplc[0] += vinc[0];
253         vplc[1] += vinc[1];
254         vplc[2] += vinc[2];
255         vplc[3] += vinc[3];
256 # endif
257         p += ourbpl;
258     } while (--cnt);
259 
260     Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
261 }
262 #endif
263 
264 // cnt >= 1
vlineasm4(bssize_t cnt,char * p)265 void vlineasm4(bssize_t cnt, char *p)
266 {
267     char * const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
268     char * const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
269 #ifdef USE_VECTOR_EXT
270     uint32_vec4 vinc = {(uint32_t)vince[0], (uint32_t)vince[1], (uint32_t)vince[2], (uint32_t)vince[3]};
271     uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
272 #else
273     const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
274     uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
275 #endif
276     const int32_t logy = glogy, ourbpl = bpl;
277 
278 #ifdef CLASSIC_NONPOW2_YSIZE_WALLS
279     if (EDUKE32_PREDICT_FALSE(!logy))
280     {
281         // This should only happen when 'globalshiftval = 0' has been set in engine.c.
282         vlineasm4nlogy(cnt, p, pal, buf, vplc, vinc);
283         return;
284     }
285 #else
286     assert(logy);
287 #endif
288 
289     // just fucking shoot me
290 #ifdef CLASSIC_SLICE_BY_4
291     for (; cnt>=4;cnt-=4)
292     {
293         p[0]                = pal[0][buf[0][ vplc[0]>>logy ]];
294         p[1]                = pal[1][buf[1][ vplc[1]>>logy ]];
295         p[2]                = pal[2][buf[2][ vplc[2]>>logy ]];
296         p[3]                = pal[3][buf[3][ vplc[3]>>logy ]];
297         (p+ourbpl)[0]       = pal[0][buf[0][ (vplc[0]+vinc[0])>>logy ]];
298         (p+ourbpl)[1]       = pal[1][buf[1][ (vplc[1]+vinc[1])>>logy ]];
299         (p+ourbpl)[2]       = pal[2][buf[2][ (vplc[2]+vinc[2])>>logy ]];
300         (p+ourbpl)[3]       = pal[3][buf[3][ (vplc[3]+vinc[3])>>logy ]];
301         (p+(ourbpl<<1))[0]  = pal[0][buf[0][ (vplc[0]+(vinc[0]<<1))>>logy ]];
302         (p+(ourbpl<<1))[1]  = pal[1][buf[1][ (vplc[1]+(vinc[1]<<1))>>logy ]];
303         (p+(ourbpl<<1))[2]  = pal[2][buf[2][ (vplc[2]+(vinc[2]<<1))>>logy ]];
304         (p+(ourbpl<<1))[3]  = pal[3][buf[3][ (vplc[3]+(vinc[3]<<1))>>logy ]];
305         (p+(ourbpl*3))[0]   = pal[0][buf[0][ (vplc[0]+(vinc[0]*3))>>logy ]];
306         (p+(ourbpl*3))[1]   = pal[1][buf[1][ (vplc[1]+(vinc[1]*3))>>logy ]];
307         (p+(ourbpl*3))[2]   = pal[2][buf[2][ (vplc[2]+(vinc[2]*3))>>logy ]];
308         (p+(ourbpl*3))[3]   = pal[3][buf[3][ (vplc[3]+(vinc[3]*3))>>logy ]];
309 
310 #if defined USE_VECTOR_EXT
311         vplc += vinc<<2;
312 #else
313         vplc[0] += vinc[0]<<2;
314         vplc[1] += vinc[1]<<2;
315         vplc[2] += vinc[2]<<2;
316         vplc[3] += vinc[3]<<2;
317 #endif
318         p += ourbpl<<2;
319     }
320 #endif
321 
322     while (cnt--)
323     {
324         p[0] = pal[0][buf[0][vplc[0]>>logy]];
325         p[1] = pal[1][buf[1][vplc[1]>>logy]];
326         p[2] = pal[2][buf[2][vplc[2]>>logy]];
327         p[3] = pal[3][buf[3][vplc[3]>>logy]];
328 
329 #if defined USE_VECTOR_EXT
330         vplc += vinc;
331 #else
332         vplc[0] += vinc[0];
333         vplc[1] += vinc[1];
334         vplc[2] += vinc[2];
335         vplc[3] += vinc[3];
336 #endif
337         p += ourbpl;
338     }
339 
340     Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
341 }
342 
343 #ifdef USE_SATURATE_VPLC
344 static int32_t g_saturate;  // -1 if saturating vplc is requested, 0 else
345 # define set_saturate(dosaturate) g_saturate = -(int)!!dosaturate
346 #else
347 # define set_saturate(dosaturate) UNREFERENCED_PARAMETER(dosaturate)
348 #endif
349 
setupmvlineasm(int32_t neglogy,int32_t dosaturate)350 void setupmvlineasm(int32_t neglogy, int32_t dosaturate)
351 {
352     glogy = neglogy;
353     set_saturate(dosaturate);
354 }
355 
356 // cnt+1 loop iterations!
mvlineasm1(int32_t vinc,intptr_t paloffs,bssize_t cnt,uint32_t vplc,intptr_t bufplc,intptr_t p)357 int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
358 {
359     char ch;
360 
361     const char *const A_C_RESTRICT buf = (char *)bufplc;
362     const char *const A_C_RESTRICT pal = (char *)paloffs;
363     const int32_t logy = glogy, ourbpl = bpl;
364     char *pp = (char *)p;
365 
366     cnt++;
367 
368     if (!logy)
369     {
370         do
371         {
372             ch = buf[ourmulscale32(vplc,globaltilesizy)];
373             if (ch != 255) *pp = pal[ch];
374             pp += ourbpl;
375             vplc += vinc;
376             saturate_vplc(vplc, vinc);
377         }
378         while (--cnt);
379 
380         return vplc;
381     }
382 
383     do
384     {
385 
386         if (buf[vplc>>logy] != 255)
387             *pp = pal[buf[vplc>>logy]];
388         pp += ourbpl;
389         vplc += vinc;
390         saturate_vplc(vplc, vinc);
391     }
392     while (--cnt);
393 
394     return vplc;
395 }
396 
397 // cnt >= 1
mvlineasm4(bssize_t cnt,char * p)398 void mvlineasm4(bssize_t cnt, char *p)
399 {
400     char *const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
401     char *const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
402 #ifdef USE_VECTOR_EXT
403     uint32_vec4 vinc = {(uint32_t)vince[0], (uint32_t)vince[1], (uint32_t)vince[2], (uint32_t)vince[3]};
404     uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
405 #else
406     const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
407     uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
408 #endif
409     const int32_t logy = glogy, ourbpl = bpl;
410     char ch;
411 
412     if (logy)
413     {
414         do
415         {
416             ch = buf[0][vplc[0]>>logy];
417             if (ch != 255) p[0] = pal[0][ch];
418             ch = buf[1][vplc[1]>>logy];
419             if (ch != 255) p[1] = pal[1][ch];
420             ch = buf[2][vplc[2]>>logy];
421             if (ch != 255) p[2] = pal[2][ch];
422             ch = buf[3][vplc[3]>>logy];
423             if (ch != 255) p[3] = pal[3][ch];
424 
425 #if !defined USE_VECTOR_EXT
426             vplc[0] += vinc[0];
427             vplc[1] += vinc[1];
428             vplc[2] += vinc[2];
429             vplc[3] += vinc[3];
430             saturate_vplc(vplc[0], vinc[0]);
431             saturate_vplc(vplc[1], vinc[1]);
432             saturate_vplc(vplc[2], vinc[2]);
433             saturate_vplc(vplc[3], vinc[3]);
434 #else
435             vplc += vinc;
436             saturate_vplc_vec(vplc, vinc);
437 #endif
438             p += ourbpl;
439         }
440         while (--cnt);
441     }
442     else
443     {
444         do
445         {
446             ch = buf[0][ourmulscale32(vplc[0],globaltilesizy)];
447             if (ch != 255) p[0] = pal[0][ch];
448             ch = buf[1][ourmulscale32(vplc[1],globaltilesizy)];
449             if (ch != 255) p[1] = pal[1][ch];
450             ch = buf[2][ourmulscale32(vplc[2],globaltilesizy)];
451             if (ch != 255) p[2] = pal[2][ch];
452             ch = buf[3][ourmulscale32(vplc[3],globaltilesizy)];
453             if (ch != 255) p[3] = pal[3][ch];
454 
455 #if !defined USE_VECTOR_EXT
456             vplc[0] += vinc[0];
457             vplc[1] += vinc[1];
458             vplc[2] += vinc[2];
459             vplc[3] += vinc[3];
460             saturate_vplc(vplc[0], vinc[0]);
461             saturate_vplc(vplc[1], vinc[1]);
462             saturate_vplc(vplc[2], vinc[2]);
463             saturate_vplc(vplc[3], vinc[3]);
464 #else
465             vplc += vinc;
466             saturate_vplc_vec(vplc, vinc);
467 #endif
468             p += ourbpl;
469         }
470         while (--cnt);
471     }
472 
473     Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
474 }
475 
476 #ifdef USE_ASM64
477 # define GLOGY a64_glogy
478 #else
479 # define GLOGY glogy
480 #endif
481 
setuptvlineasm(int32_t neglogy,int32_t dosaturate)482 void setuptvlineasm(int32_t neglogy, int32_t dosaturate)
483 {
484     GLOGY = neglogy;
485     set_saturate(dosaturate);
486 }
487 
488 #if !defined USE_ASM64
489 // cnt+1 loop iterations!
tvlineasm1(int32_t vinc,intptr_t paloffs,bssize_t cnt,uint32_t vplc,intptr_t bufplc,intptr_t p)490 int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
491 {
492     char ch;
493 
494     const char *const A_C_RESTRICT buf = (char *)bufplc;
495     const char *const A_C_RESTRICT pal = (char *)paloffs;
496     const char *const A_C_RESTRICT trans = (char *)gtrans;
497     const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
498     char *pp = (char *)p;
499 
500     cnt++;
501 
502     uint8_t const shift = transm<<3;
503 
504     do
505     {
506         ch = getpix(logy, buf, vplc);
507         if (ch != 255) *pp = trans[((*pp)<<(8-shift))|(pal[ch]<<shift)];
508         pp += ourbpl;
509         vplc += vinc;
510         saturate_vplc_trans(vplc, vinc);
511     }
512     while (--cnt);
513 
514     return vplc;
515 }
516 #endif
517 
setuptvlineasm2(int32_t neglogy,intptr_t paloffs1,intptr_t paloffs2)518 void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2)
519 {
520     GLOGY = neglogy;
521     A64_ASSIGN(a64_paloffs, paloffs1);
522     gpal = (char *)paloffs1;
523     gpal2 = (char *)paloffs2;
524 }
525 
526 #if !defined USE_ASM64
527 // Pass: asm1=vinc2, asm2=pend
528 // Return: asm1=vplc1, asm2=vplc2
tvlineasm2(uint32_t vplc2,int32_t vinc1,intptr_t bufplc1,intptr_t bufplc2,uint32_t vplc1,intptr_t p)529 void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc2, uint32_t vplc1, intptr_t p)
530 {
531     char ch;
532 
533     bssize_t cnt = tabledivide32(asm2-p-1, bpl);  // >= 1
534     const int32_t vinc2 = asm1;
535 
536     const char *const A_C_RESTRICT buf1 = (char *)bufplc1;
537     const char *const A_C_RESTRICT buf2 = (char *)bufplc2;
538     const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
539 
540     char *pp = (char *)p;
541 
542     cnt++;
543 
544     uint8_t const shift = transm<<3;
545 
546     do
547     {
548         ch = getpix(logy, buf1, vplc1);
549         if (ch != 255) pp[0] = gtrans[(pp[0]<<(8-shift))|(gpal[ch]<<shift)];
550         vplc1 += vinc1;
551         saturate_vplc_trans(vplc1, vinc1);
552 
553         ch = getpix(logy, buf2, vplc2);
554         if (ch != 255) pp[1] = gtrans[(pp[1]<<(8-shift))|(gpal2[ch]<<shift)];
555         vplc2 += vinc2;
556         saturate_vplc_trans(vplc2, vinc2);
557 
558         pp += ourbpl;
559     }
560     while (--cnt > 0);
561 
562     asm1 = vplc1;
563     asm2 = vplc2;
564 }
565 #endif
566 
567 //Floor sprite horizontal line functions
msethlineshift(int32_t logx,int32_t logy)568 void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
569 // cntup16>>16 + 1 iterations
mhline(intptr_t bufplc,uint32_t bx,int32_t cntup16,int32_t junk,uint32_t by,intptr_t p)570 void mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p)
571 {
572     char ch;
573 
574     const int32_t xinc = asm1, yinc = asm2;
575 
576     UNREFERENCED_PARAMETER(junk);
577 
578     gbuf = (char *)bufplc;
579     gpal = (char *)asm3;
580 
581     cntup16>>=16;
582     cntup16++;
583     do
584     {
585         ch = gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))];
586         if (ch != 255) *((char *)p) = gpal[ch];
587         bx += xinc;
588         by += yinc;
589         p++;
590     }
591     while (--cntup16);
592 }
593 
tsethlineshift(int32_t logx,int32_t logy)594 void tsethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
595 // cntup16>>16 + 1 iterations
thline(intptr_t bufplc,uint32_t bx,int32_t cntup16,int32_t junk,uint32_t by,intptr_t p)596 void thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p)
597 {
598     char ch;
599 
600     const int32_t xinc = asm1, yinc = asm2;
601 
602     UNREFERENCED_PARAMETER(junk);
603 
604     gbuf = (char *)bufplc;
605     gpal = (char *)asm3;
606 
607     cntup16>>=16;
608     cntup16++;
609 
610     uint8_t const shift = transmode<<3;
611 
612     do
613     {
614         ch = gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))];
615         if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<(8-shift))|(gpal[ch]<<shift)];
616         bx += xinc;
617         by += yinc;
618         p++;
619     }
620     while (--cntup16);
621 }
622 
623 
624 //Rotatesprite vertical line functions
setupspritevline(intptr_t paloffs,int32_t bxinc,int32_t byinc,int32_t ysiz)625 void setupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
626 {
627     gpal = (char *)paloffs;
628     gbxinc = bxinc;
629     gbyinc = byinc;
630     glogy = ysiz;
631 }
spritevline(int32_t bx,int32_t by,bssize_t cnt,intptr_t bufplc,intptr_t p)632 void spritevline(int32_t bx, int32_t by, bssize_t cnt, intptr_t bufplc, intptr_t p)
633 {
634     gbuf = (char *)bufplc;
635     for (; cnt>1; cnt--)
636     {
637         (*(char *)p) = gpal[gbuf[(bx>>16)*glogy+(by>>16)]];
638         bx += gbxinc;
639         by += gbyinc;
640         p += bpl;
641     }
642 }
643 
644 //Rotatesprite vertical line functions
msetupspritevline(intptr_t paloffs,int32_t bxinc,int32_t byinc,int32_t ysiz)645 void msetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
646 {
647     gpal = (char *)paloffs;
648     gbxinc = bxinc;
649     gbyinc = byinc;
650     glogy = ysiz;
651 }
mspritevline(int32_t bx,int32_t by,bssize_t cnt,intptr_t bufplc,intptr_t p)652 void mspritevline(int32_t bx, int32_t by, bssize_t cnt, intptr_t bufplc, intptr_t p)
653 {
654     char ch;
655 
656     gbuf = (char *)bufplc;
657     for (; cnt>1; cnt--)
658     {
659         ch = gbuf[(bx>>16)*glogy+(by>>16)];
660         if (ch != 255) (*(char *)p) = gpal[ch];
661         bx += gbxinc;
662         by += gbyinc;
663         p += bpl;
664     }
665 }
666 
tsetupspritevline(intptr_t paloffs,int32_t bxinc,int32_t byinc,int32_t ysiz)667 void tsetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
668 {
669     gpal = (char *)paloffs;
670     gbxinc = bxinc;
671     gbyinc = byinc;
672     glogy = ysiz;
673 }
tspritevline(int32_t bx,int32_t by,bssize_t cnt,intptr_t bufplc,intptr_t p)674 void tspritevline(int32_t bx, int32_t by, bssize_t cnt, intptr_t bufplc, intptr_t p)
675 {
676     char ch;
677 
678     gbuf = (char *)bufplc;
679 
680     uint8_t const shift = transmode<<3;
681 
682     for (; cnt>1; cnt--)
683     {
684         ch = gbuf[(bx>>16)*glogy+(by>>16)];
685         if (ch != 255) *((char *)p) =  gtrans[((*((char *)p))<<(8-shift))+(gpal[ch]<<shift)];
686         bx += gbxinc;
687         by += gbyinc;
688         p += bpl;
689     }
690 }
691 
setupdrawslab(int32_t dabpl,intptr_t pal)692 void setupdrawslab(int32_t dabpl, intptr_t pal)
693 {
694     bpl  = dabpl;
695     gpal = (char *)pal;
696 }
697 
drawslab(int32_t dx,int32_t v,int32_t dy,int32_t vi,intptr_t vptr,intptr_t p)698 void drawslab(int32_t dx, int32_t v, int32_t dy, int32_t vi, intptr_t vptr, intptr_t p)
699 {
700     do
701     {
702         char const c = gpal[(int32_t)(*(char *)((v>>16)+vptr))];
703         for (int x=0; x < dx; x++)
704             ((char*)p)[x] = c;
705         p += bpl;
706         v += vi;
707     }
708     while (--dy);
709 }
710 
711 #if 0
712 void stretchhline(intptr_t p0, int32_t u, bssize_t cnt, int32_t uinc, intptr_t rptr, intptr_t p)
713 {
714     p0 = p-(cnt<<2);
715     do
716     {
717         p--;
718         *(char *)p = *(char *)((u>>16)+rptr); u -= uinc;
719     }
720     while (p > p0);
721 }
722 #endif
723 
724 #endif
725 /*
726  * vim:ts=4:
727  */
728 
729