1 //
2 // Microsoft C inline assembler
3 //
4
5 //{{{
6
7 #ifdef pragmas_h_
8 #ifndef pragmas_x86_h_
9 #define pragmas_x86_h_
10
11 #define pragmas_have_mulscale
12
mulscale(int32_t a,int32_t d,int32_t c)13 static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
14 {
15 _asm {
16 mov ecx, c
17 mov eax, a
18 imul d
19 shrd eax, edx, cl
20 }
21 }
22
23 #define EDUKE32_SCALER_PRAGMA(x) \
24 static __inline int32_t mulscale##x (int32_t a, int32_t d) \
25 { \
26 _asm mov eax, a \
27 _asm imul d \
28 _asm shrd eax, edx, x \
29 } \
30 static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
31 { \
32 _asm mov eax, a \
33 _asm imul d \
34 _asm mov ebx, eax \
35 _asm mov eax, S \
36 _asm mov esi, edx \
37 _asm imul D \
38 _asm add eax, ebx \
39 _asm adc edx, esi \
40 _asm shrd eax, edx, x \
41 } \
42 static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
43 { \
44 _asm mov eax, a \
45 _asm mov ebx, b \
46 _asm imul d \
47 _asm xchg eax, ebx \
48 _asm mov ecx, c \
49 _asm xchg edx, ecx \
50 _asm imul edx \
51 _asm add ebx, eax \
52 _asm adc ecx, edx \
53 _asm mov eax, S \
54 _asm imul D \
55 _asm add eax, ebx \
56 _asm adc edx, ecx \
57 _asm shrd eax, edx, x \
58 } \
59
60 EDUKE32_GENERATE_PRAGMAS
61 #undef EDUKE32_SCALER_PRAGMA
62
mulscale32(int32_t a,int32_t d)63 static __inline int32_t mulscale32(int32_t a, int32_t d)
64 {
65 _asm {
66 mov eax, a
67 imul d
68 mov eax, edx
69 }
70 }
71
dmulscale(int32_t a,int32_t d,int32_t S,int32_t D,int32_t c)72 static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
73 {
74 _asm {
75 mov ecx, c
76 mov eax, a
77 imul d
78 mov ebx, eax
79 mov eax, S
80 mov esi, edx
81 imul D
82 add eax, ebx
83 adc edx, esi
84 shrd eax, edx, cl
85 }
86 }
87
dmulscale32(int32_t a,int32_t d,int32_t S,int32_t D)88 static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
89 {
90 _asm {
91 mov eax, a
92 imul d
93 mov ebx, eax
94 mov eax, S
95 mov esi, edx
96 imul D
97 add eax, ebx
98 adc edx, esi
99 mov eax, edx
100 }
101 }
102
tmulscale32(int32_t a,int32_t d,int32_t b,int32_t c,int32_t S,int32_t D)103 static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
104 {
105 _asm {
106 mov eax, a
107 mov ebx, b
108 imul d
109 xchg eax, ebx
110 mov ecx, c
111 xchg edx, ecx
112 imul edx
113 add ebx, eax
114 adc ecx, edx
115 mov eax, S
116 imul D
117 add eax, ebx
118 adc edx, ecx
119 mov eax, edx
120 }
121 }
122
123 #define pragmas_have_clearbuf
124
clearbuf(void * d,int32_t c,int32_t a)125 static __inline void clearbuf(void *d, int32_t c, int32_t a)
126 {
127 _asm {
128 mov edi, d
129 mov ecx, c
130 mov eax, a
131 rep stosd
132 }
133 }
134
135 #define pragmas_have_clearbufbyte
136
clearbufbyte(void * d,int32_t c,int32_t a)137 static __inline void clearbufbyte(void *d, int32_t c, int32_t a)
138 {
139 _asm {
140 mov edi, d
141 mov ecx, c
142 mov eax, a
143 cmp ecx, 4
144 jae longcopy
145 test cl, 1
146 jz preskip
147 stosb
148 preskip :
149 shr ecx, 1
150 rep stosw
151 jmp endit
152 longcopy :
153 test edi, 1
154 jz skip1
155 stosb
156 dec ecx
157 skip1 :
158 test edi, 2
159 jz skip2
160 stosw
161 sub ecx, 2
162 skip2 :
163 mov ebx, ecx
164 shr ecx, 2
165 rep stosd
166 test bl, 2
167 jz skip3
168 stosw
169 skip3 :
170 test bl, 1
171 jz endit
172 stosb
173 endit :
174 }
175 }
176
177 #define pragmas_have_copybuf
178
copybuf(const void * s,void * d,int32_t c)179 static __inline void copybuf(const void *s, void *d, int32_t c)
180 {
181 _asm {
182 mov esi, s
183 mov edi, d
184 mov ecx, c
185 rep movsd
186 }
187 }
188
189 #define pragmas_have_copybufbyte
190
copybufbyte(const void * s,void * d,int32_t c)191 static __inline void copybufbyte(const void *s, void *d, int32_t c)
192 {
193 _asm {
194 mov esi, s
195 mov edi, d
196 mov ecx, c
197 cmp ecx, 4
198 jae longcopy
199 test cl, 1
200 jz preskip
201 movsb
202 preskip :
203 shr ecx, 1
204 rep movsw
205 jmp endit
206 longcopy :
207 test edi, 1
208 jz skip1
209 movsb
210 dec ecx
211 skip1 :
212 test edi, 2
213 jz skip2
214 movsw
215 sub ecx, 2
216 skip2 :
217 mov ebx, ecx
218 shr ecx, 2
219 rep movsd
220 test bl, 2
221 jz skip3
222 movsw
223 skip3 :
224 test bl, 1
225 jz endit
226 movsb
227 endit :
228 }
229 }
230
231 #define pragmas_have_copybufreverse
232
copybufreverse(const void * s,void * d,int32_t c)233 static __inline void copybufreverse(const void *s, void *d, int32_t c)
234 {
235 _asm {
236 mov esi, s
237 mov edi, d
238 mov ecx, c
239 shr ecx, 1
240 jnc skipit1
241 mov al, byte ptr[esi]
242 dec esi
243 mov byte ptr[edi], al
244 inc edi
245 skipit1 :
246 shr ecx, 1
247 jnc skipit2
248 mov ax, word ptr[esi-1]
249 sub esi, 2
250 ror ax, 8
251 mov word ptr[edi], ax
252 add edi, 2
253 skipit2:
254 test ecx, ecx
255 jz endloop
256 begloop :
257 mov eax, dword ptr[esi-3]
258 sub esi, 4
259 bswap eax
260 mov dword ptr[edi], eax
261 add edi, 4
262 dec ecx
263 jnz begloop
264 endloop :
265 }
266 }
267
268 #define pragmas_have_qinterpolatedown16
269
qinterpolatedown16(int32_t a,int32_t c,int32_t d,int32_t s)270 static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)
271 {
272 _asm {
273 mov eax, a
274 mov ecx, c
275 mov edx, d
276 mov esi, s
277 mov ebx, ecx
278 shr ecx, 1
279 jz skipbegcalc
280 begqcalc :
281 lea edi, [edx+esi]
282 sar edx, 16
283 mov dword ptr[eax], edx
284 lea edx, [edi+esi]
285 sar edi, 16
286 mov dword ptr[eax+4], edi
287 add eax, 8
288 dec ecx
289 jnz begqcalc
290 test ebx, 1
291 jz skipbegqcalc2
292 skipbegcalc :
293 sar edx, 16
294 mov dword ptr[eax], edx
295 skipbegqcalc2 :
296 }
297 }
298
qinterpolatedown16short(int32_t a,int32_t c,int32_t d,int32_t s)299 static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)
300 {
301 _asm {
302 mov eax, a
303 mov ecx, c
304 mov edx, d
305 mov esi, s
306 test ecx, ecx
307 jz endit
308 test al, 2
309 jz skipalignit
310 mov ebx, edx
311 sar ebx, 16
312 mov word ptr[eax], bx
313 add edx, esi
314 add eax, 2
315 dec ecx
316 jz endit
317 skipalignit :
318 sub ecx, 2
319 jc finishit
320 begqcalc :
321 mov ebx, edx
322 add edx, esi
323 sar ebx, 16
324 mov edi, edx
325 and edi, 0ffff0000h
326 add edx, esi
327 add ebx, edi
328 mov dword ptr[eax], ebx
329 add eax, 4
330 sub ecx, 2
331 jnc begqcalc
332 test cl, 1
333 jz endit
334 finishit :
335 mov ebx, edx
336 sar ebx, 16
337 mov word ptr[eax], bx
338 endit :
339 }
340 }
341
342 #define pragmas_have_klabs
343
klabs(int32_t a)344 static __inline int32_t klabs(int32_t a)
345 {
346 _asm {
347 mov eax, a
348 test eax, eax
349 jns skipnegate
350 neg eax
351 skipnegate :
352 }
353 }
354
355 #define pragmas_have_ksgn
356
ksgn(int32_t b)357 static __inline int ksgn(int32_t b)
358 {
359 _asm {
360 mov ebx, b
361 add ebx, ebx
362 sbb eax, eax
363 cmp eax, ebx
364 adc al, 0
365 }
366 }
367
368 #define pragmas_have_swaps
369
swapchar(void * a,void * b)370 static __inline void swapchar(void *a, void *b)
371 {
372 _asm {
373 mov eax, a
374 mov ebx, b
375 mov cl, [eax]
376 mov ch, [ebx]
377 mov[ebx], cl
378 mov[eax], ch
379 }
380 }
381
swapshort(void * a,void * b)382 static __inline void swapshort(void *a, void *b)
383 {
384 _asm {
385 mov eax, a
386 mov ebx, b
387 mov cx, [eax]
388 mov dx, [ebx]
389 mov[ebx], cx
390 mov[eax], dx
391 }
392 }
393
swaplong(void * a,void * b)394 static __inline void swaplong(void *a, void *b)
395 {
396 _asm {
397 mov eax, a
398 mov ebx, b
399 mov ecx, [eax]
400 mov edx, [ebx]
401 mov[ebx], ecx
402 mov[eax], edx
403 }
404 }
405
406 #define swapfloat swaplong
407
swapbuf4(void * a,void * b,int32_t c)408 static __inline void swapbuf4(void *a, void *b, int32_t c)
409 {
410 _asm {
411 mov eax, a
412 mov ebx, b
413 mov ecx, c
414 begswap :
415 mov esi, [eax]
416 mov edi, [ebx]
417 mov[ebx], esi
418 mov[eax], edi
419 add eax, 4
420 add ebx, 4
421 dec ecx
422 jnz short begswap
423 }
424 }
425
swap64bit(void * a,void * b)426 static __inline void swap64bit(void *a, void *b)
427 {
428 _asm {
429 mov eax, a
430 mov ebx, b
431 mov ecx, [eax]
432 mov edx, [ebx]
433 mov[ebx], ecx
434 mov ecx, [eax+4]
435 mov[eax], edx
436 mov edx, [ebx+4]
437 mov[ebx+4], ecx
438 mov[eax+4], edx
439 }
440 }
441
442 #define swapdouble swap64bit
443
444 //swapchar2(ptr1,ptr2,xsiz); is the same as:
445 //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
swapchar2(void * a,void * b,int32_t s)446 static __inline void swapchar2(void *a, void *b, int32_t s)
447 {
448 _asm {
449 mov eax, a
450 mov ebx, b
451 mov esi, s
452 add esi, ebx
453 mov cx, [eax]
454 mov dl, [ebx]
455 mov[ebx], cl
456 mov dh, [esi]
457 mov[esi], ch
458 mov[eax], dx
459 }
460 }
461
462 #define pragmas_have_krecipasm
463
464 //0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
krecipasm(int32_t a)465 static inline int32_t krecipasm(int32_t a)
466 {
467 _asm
468 {
469 push ebx
470 mov eax, a
471 mov fpuasm, eax
472 fild dword ptr fpuasm
473 add eax, eax
474 fstp dword ptr fpuasm
475 sbb ebx, ebx
476 mov eax, fpuasm
477 mov ecx, eax
478 and eax, 0x007ff000
479 shr eax, 10
480 sub ecx, 0x3f800000
481 shr ecx, 23
482 mov eax, dword ptr reciptable[eax]
483 sar eax, cl
484 xor eax, ebx
485 pop ebx
486 }
487 }
488
489 //}}}
490
491 #endif // pragmas_x86_h_
492 #endif // pragmas_h_
493