1 #include <math.h>
2
3 #include "3dc.h"
4 #include "mathline.h"
5
6 void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
7 void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a);
8 void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c);
9 void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a);
10 void MUL_I_WIDE(int a, int b, LONGLONGCH *c);
11 int CMP_LL(LONGLONGCH *a, LONGLONGCH *b);
12 void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b);
13 void NEG_LL(LONGLONGCH *a);
14 void ASR_LL(LONGLONGCH *a, int shift);
15 void IntToLL(LONGLONGCH *a, int *b);
16 int DIV_FIXED(int a, int b);
17
18 int NarrowDivide(LONGLONGCH *a, int b);
19 int WideMulNarrowDiv(int a, int b, int c);
20 void RotateVector_ASM(VECTORCH *v, MATRIXCH *m);
21 void RotateAndCopyVector_ASM(VECTORCH *v1, VECTORCH *v2, MATRIXCH *m);
22
23 #undef ASM386
24
25 #if !defined(ASM386)
ConvertToLongLong(const LONGLONGCH * llch)26 static __int64 ConvertToLongLong(const LONGLONGCH* llch)
27 {
28 __int64 ll;
29
30 ll = ((__int64)llch->hi32 << 32) | ((__int64)llch->lo32 << 0);
31
32 return ll;
33 }
34
ConvertFromLongLong(LONGLONGCH * llch,const __int64 * ll)35 static void ConvertFromLongLong(LONGLONGCH* llch, const __int64* ll)
36 {
37 llch->lo32 = (unsigned int)((*ll>> 0) & 0xffffffff);
38 llch->hi32 = ( signed int)((*ll>>32) & 0xffffffff);
39 }
40 #endif
41
ADD_LL(LONGLONGCH * a,LONGLONGCH * b,LONGLONGCH * c)42 void ADD_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
43 {
44 /*
45 _asm
46 {
47 mov esi,a
48 mov edi,b
49 mov ebx,c
50 mov eax,[esi]
51 mov edx,[esi+4]
52 add eax,[edi]
53 adc edx,[edi+4]
54 mov [ebx],eax
55 mov [ebx+4],edx
56 }
57 */
58 #if defined(ASM386)
59 int dummy1, dummy2;
60 __asm__("movl 0(%%esi), %0 \n\t"
61 "movl 4(%%esi), %1 \n\t"
62 "addl 0(%%edi), %0 \n\t"
63 "adcl 4(%%edi), %1 \n\t"
64 "movl %0, 0(%%ebx) \n\t"
65 "movl %1, 4(%%ebx) \n\t"
66 : "=&r" (dummy1), "=&r" (dummy2)
67 : "S" (a), "D" (b), "b" (c)
68 : "memory", "cc"
69 );
70
71 /*
72 __asm__("movl 0(%%esi), %%eax \n\t"
73 "movl 4(%%esi), %%edx \n\t"
74 "addl 0(%%edi), %%eax \n\t"
75 "adcl 4(%%edi), %%edx \n\t"
76 : "=a" (c->lo32), "=d" (c->hi32)
77 : "S" (a), "D" (b)
78 );
79 */
80 #else
81 __int64 aa = ConvertToLongLong(a);
82 __int64 bb = ConvertToLongLong(b);
83
84 __int64 cc = aa + bb;
85
86 ConvertFromLongLong(c, &cc);
87 #endif
88
89 }
90
91 /* ADD ++ */
92
ADD_LL_PP(LONGLONGCH * c,LONGLONGCH * a)93 void ADD_LL_PP(LONGLONGCH *c, LONGLONGCH *a)
94 {
95 /*
96 _asm
97 {
98 mov edi,c
99 mov esi,a
100 mov eax,[esi]
101 mov edx,[esi+4]
102 add [edi],eax
103 adc [edi+4],edx
104 }
105 */
106
107 #if defined(ASM386)
108 int dummy1, dummy2;
109 __asm__("movl 0(%%esi), %0 \n\t"
110 "movl 4(%%esi), %1 \n\t"
111 "addl %0, 0(%%edi) \n\t"
112 "adcl %1, 4(%%edi) \n\t"
113 : "=&r" (dummy1), "=&r" (dummy2)
114 : "D" (c), "S" (a)
115 : "memory", "cc"
116 );
117 #else
118 __int64 cc = ConvertToLongLong(c);
119 __int64 aa = ConvertToLongLong(a);
120
121 cc += aa;
122
123 ConvertFromLongLong(c, &cc);
124 #endif
125 }
126
127 /* SUB */
128
SUB_LL(LONGLONGCH * a,LONGLONGCH * b,LONGLONGCH * c)129 void SUB_LL(LONGLONGCH *a, LONGLONGCH *b, LONGLONGCH *c)
130 {
131 /*
132 _asm
133 {
134 mov esi,a
135 mov edi,b
136 mov ebx,c
137 mov eax,[esi]
138 mov edx,[esi+4]
139 sub eax,[edi]
140 sbb edx,[edi+4]
141 mov [ebx],eax
142 mov [ebx+4],edx
143 }
144 */
145 #if defined(ASM386)
146 int dummy1, dummy2;
147 __asm__("movl 0(%%esi), %0 \n\t"
148 "movl 4(%%esi), %1 \n\t"
149 "subl 0(%%edi), %0 \n\t"
150 "sbbl 4(%%edi), %1 \n\t"
151 "movl %0, 0(%%ebx) \n\t"
152 "movl %1, 4(%%ebx) \n\t"
153 : "=&r" (dummy1), "=&r" (dummy2)
154 : "S" (a), "D" (b), "b" (c)
155 : "memory", "cc"
156 );
157 #else
158 __int64 aa = ConvertToLongLong(a);
159 __int64 bb = ConvertToLongLong(b);
160
161 __int64 cc = aa - bb;
162
163 ConvertFromLongLong(c, &cc);
164 #endif
165 }
166
167 /* SUB -- */
168
SUB_LL_MM(LONGLONGCH * c,LONGLONGCH * a)169 void SUB_LL_MM(LONGLONGCH *c, LONGLONGCH *a)
170 {
171 /*
172 _asm
173 {
174 mov edi,c
175 mov esi,a
176 mov eax,[esi]
177 mov edx,[esi+4]
178 sub [edi],eax
179 sbb [edi+4],edx
180 }
181 */
182 #if defined(ASM386)
183 int dummy1, dummy2;
184 __asm__("movl 0(%%esi), %0 \n\t"
185 "movl 4(%%esi), %1 \n\t"
186 "subl %0, 0(%%edi) \n\t"
187 "sbbl %1, 4(%%edi) \n\t"
188 : "=&r" (dummy1), "=&r" (dummy2)
189 : "D" (c), "S" (a)
190 : "memory", "cc"
191 );
192 #else
193 __int64 cc = ConvertToLongLong(c);
194 __int64 aa = ConvertToLongLong(a);
195
196 cc -= aa;
197
198 ConvertFromLongLong(c, &cc);
199 #endif
200 }
201
202 /*
203
204 MUL
205
206 This is the multiply we use, the 32 x 32 = 64 widening version
207
208 */
209
MUL_I_WIDE(int a,int b,LONGLONGCH * c)210 void MUL_I_WIDE(int a, int b, LONGLONGCH *c)
211 {
212 /*
213 _asm
214 {
215 mov eax,a
216 mov ebx,c
217 imul b
218 mov [ebx],eax
219 mov [ebx+4],edx
220 }
221 */
222 #if defined(ASM386)
223 unsigned int d1;
224 __asm__("imull %3 \n\t"
225 "movl %%eax, 0(%%ebx) \n\t"
226 "movl %%edx, 4(%%ebx) \n\t"
227 : "=a" (d1)
228 : "0" (a), "b" (c), "m" (b)
229 : "%edx", "memory", "cc"
230 );
231 #else
232 __int64 aa = (__int64) a;
233 __int64 bb = (__int64) b;
234
235 __int64 cc = aa * bb;
236
237 ConvertFromLongLong(c, &cc);
238 #endif
239 }
240
241 /*
242
243 CMP
244
245 This substitutes for ==, >, <, >=, <=
246
247 */
248
CMP_LL(LONGLONGCH * a,LONGLONGCH * b)249 int CMP_LL(LONGLONGCH *a, LONGLONGCH *b)
250 {
251 /*
252 int retval;
253 _asm
254 {
255 mov ebx,a
256 mov ecx,b
257 mov eax,[ebx]
258 mov edx,[ebx+4]
259 sub eax,[ecx]
260 sbb edx,[ecx+4]
261 and edx,edx
262 jne llnz
263 and eax,eax
264 je llgs
265 llnz:
266 mov retval,1
267 and edx,edx
268 jge llgs
269 neg retval
270 llgs:
271 }
272 */
273 #if defined(ASM386)
274 int retval;
275
276 __asm__("movl 0(%%ebx), %%eax \n\t"
277 "movl 4(%%ebx), %%edx \n\t"
278 "subl 0(%%ecx), %%eax \n\t"
279 "sbbl 4(%%ecx), %%edx \n\t"
280 "xorl %%ebx, %%ebx \n\t"
281 "andl %%edx, %%edx \n\t"
282 "jne 0f \n\t" /* llnz */
283 "andl %%eax, %%eax \n\t"
284 "je 1f \n" /* llgs */
285 "0: \n\t" /* llnz */
286 "movl $1, %%ebx \n\t"
287 "andl %%edx, %%edx \n\t"
288 "jge 1f \n\t" /* llgs */
289 "negl %%ebx \n"
290 "1: \n\t" /* llgs */
291 : "=b" (retval)
292 : "b" (a), "c" (b)
293 : "%eax", "%edx", "memory", "cc"
294 );
295
296 return retval;
297 #else
298 if (a->hi32 > b->hi32)
299 return 1;
300 else if (a->hi32 < b->hi32)
301 return -1;
302 else if (a->lo32 > b->lo32)
303 return 1;
304 else if (a->lo32 < b->lo32)
305 return -1;
306 else
307 return 0;
308 #endif
309 }
310
311 /* EQUALS */
312
EQUALS_LL(LONGLONGCH * a,LONGLONGCH * b)313 void EQUALS_LL(LONGLONGCH *a, LONGLONGCH *b)
314 {
315 /*
316 _asm
317 {
318 mov edi,a
319 mov esi,b
320 mov eax,[esi]
321 mov edx,[esi+4]
322 mov [edi],eax
323 mov [edi+4],edx
324 }
325 */
326 #if defined(ASM386)
327 __asm__("movl 0(%%esi), %%eax \n\t"
328 "movl 4(%%esi), %%edx \n\t"
329 "movl %%eax, 0(%%edi) \n\t"
330 "movl %%edx, 4(%%edi) \n\t"
331 :
332 : "D" (a), "S" (b)
333 : "%eax", "%edx", "memory"
334 );
335 #else
336 *a = *b;
337 #endif
338 }
339
340 /* NEGATE */
341
NEG_LL(LONGLONGCH * a)342 void NEG_LL(LONGLONGCH *a)
343 {
344 /*
345 _asm
346 {
347 mov esi,a
348 not dword ptr[esi]
349 not dword ptr[esi+4]
350 add dword ptr[esi],1
351 adc dword ptr[esi+4],0
352 }
353 */
354 #if defined(ASM386)
355 __asm__("notl 0(%%esi) \n\t"
356 "notl 4(%%esi) \n\t"
357 "addl $1, 0(%%esi) \n\t"
358 "adcl $0, 4(%%esi) \n\t"
359 :
360 : "S" (a)
361 : "memory", "cc"
362 );
363 #else
364 __int64 aa = ConvertToLongLong(a);
365
366 aa = -aa;
367
368 ConvertFromLongLong(a, &aa);
369 #endif
370 }
371
372 /* ASR */
373
ASR_LL(LONGLONGCH * a,int shift)374 void ASR_LL(LONGLONGCH *a, int shift)
375 {
376 /*
377 _asm
378 {
379 mov esi,a
380 mov eax,shift
381 and eax,eax
382 jle asrdn
383 asrlp:
384 sar dword ptr[esi+4],1
385 rcr dword ptr[esi],1
386 dec eax
387 jne asrlp
388 asrdn:
389 }
390 */
391 #if defined(ASM386)
392 unsigned int d1;
393 __asm__ volatile
394 ("andl %0, %0 \n\t"
395 "jle 0 \n" /* asrdn */
396 "1: \n\t" /* asrlp */
397 "sarl $1, 4(%%esi) \n\t"
398 "rcrl $1, 0(%%esi) \n\t"
399 "decl %0 \n\t"
400 "jne 1 \n"
401 "0: \n\t"
402 : "=&r" (d1)
403 : "S" (a), "a" (shift)
404 : "memory", "cc"
405 );
406 #else
407 __int64 aa = ConvertToLongLong(a);
408
409 aa >>= shift;
410
411 ConvertFromLongLong(a, &aa);
412 #endif
413 }
414
415 /* Convert int to LONGLONGCH */
416
IntToLL(LONGLONGCH * a,int * b)417 void IntToLL(LONGLONGCH *a, int *b)
418 {
419 /*
420 _asm
421 {
422 mov esi,b
423 mov edi,a
424 mov eax,[esi]
425 cdq
426 mov [edi],eax
427 mov [edi+4],edx
428 }
429 */
430 #if defined(ASM386)
431 __asm__("movl 0(%%esi), %%eax \n\t"
432 "cdq \n\t"
433 "movl %%eax, 0(%%edi) \n\t"
434 "movl %%edx, 4(%%edi) \n\t"
435 :
436 : "S" (b), "D" (a)
437 : "%eax", "%edx", "memory", "cc"
438 );
439 #else
440 __int64 aa = (__int64) *b;
441
442 ConvertFromLongLong(a, &aa);
443 #endif
444 }
445
446 //
447 // Fixed Point Multiply - MUL_FIXED
448 // See mathline.h
449 //
450
451 /*
452
453 Fixed Point Divide - returns a / b
454
455 */
456
DIV_FIXED(int a,int b)457 int DIV_FIXED(int a, int b)
458 {
459 if (b == 0) printf("DEBUG THIS: a = %d, b = %d\n", a, b);
460
461 if (b == 0) return 0; /* TODO: debug this! (start with alien on ferarco) */
462 /*
463 int retval;
464 _asm
465 {
466 mov eax,a
467 cdq
468 rol eax,16
469 mov dx,ax
470 xor ax,ax
471 idiv b
472 mov retval,eax
473 }
474 */
475 #if defined(ASM386)
476 int retval;
477 __asm__("cdq \n\t"
478 "roll $16, %%eax \n\t"
479 "mov %%ax, %%dx \n\t"
480 "xor %%ax, %%ax \n\t"
481 "idivl %2 \n\t"
482 : "=a" (retval)
483 : "0" (a), "m" (b)
484 : "%edx", "cc"
485 );
486 return retval;
487 #else
488 {
489 __int64 aa = (__int64) a;
490 __int64 bb = (__int64) b;
491 __int64 cc = (aa << 16) / bb;
492
493 return (int) (cc & 0xffffffff);
494 }
495 #endif
496 }
497
498 /*
499
500 Multiply and Divide Functions.
501
502 */
503
504 /*
505
506 A Narrowing 64/32 Division
507
508 */
509
NarrowDivide(LONGLONGCH * a,int b)510 int NarrowDivide(LONGLONGCH *a, int b)
511 {
512 /*
513 int retval;
514 _asm
515 {
516 mov esi,a
517 mov eax,[esi]
518 mov edx,[esi+4]
519 idiv b
520 mov retval,eax
521 }
522 */
523 #if defined(ASM386)
524 int retval;
525 __asm__("movl 0(%%esi), %%eax \n\t"
526 "movl 4(%%esi), %%edx \n\t"
527 "idivl %2 \n\t"
528 : "=a" (retval)
529 : "S" (a), "m" (b)
530 : "%edx", "cc"
531 );
532 return retval;
533 #else
534 __int64 aa = ConvertToLongLong(a);
535 __int64 bb = (__int64) b;
536
537 __int64 cc = aa / bb;
538
539 return (int) (cc & 0xffffffff);
540 #endif
541 }
542
543 /*
544
545 This function performs a Widening Multiply followed by a Narrowing Divide.
546
547 a = (a * b) / c
548
549 */
550
WideMulNarrowDiv(int a,int b,int c)551 int WideMulNarrowDiv(int a, int b, int c)
552 {
553 /*
554 int retval;
555 _asm
556 {
557 mov eax,a
558 imul b
559 idiv c
560 mov retval,eax
561 }
562 */
563 #if defined(ASM386)
564 int retval;
565 __asm__("imull %2 \n\t"
566 "idivl %3 \n\t"
567 : "=a" (retval)
568 : "0" (a), "m" (b), "m" (c)
569 : "%edx", "cc"
570 );
571 return retval;
572 #else
573 __int64 aa = (__int64) a;
574 __int64 bb = (__int64) b;
575 __int64 cc = (__int64) c;
576
577 __int64 dd = (aa * bb) / cc;
578
579 return (int) (dd & 0xffffffff);
580 #endif
581 }
582
583 /*
584
585 Square Root
586
587 Returns the Square Root of a 32-bit number
588
589 */
590
SqRoot32(int A)591 int SqRoot32(int A)
592 {
593 /*
594 _asm
595 {
596 finit
597 fild A
598 fsqrt
599 fistp temp2
600 fwait
601 }
602 */
603
604 #if defined(ASM386)
605 static volatile int sqrt_temp;
606 __asm__ volatile
607 ("finit \n\t"
608 "fildl %0 \n\t"
609 "fsqrt \n\t"
610 "fistpl sqrt_temp \n\t"
611 "fwait \n\t"
612 :
613 : "m" (A)
614 : "memory", "cc"
615 );
616
617 return sqrt_temp;
618 #else
619 float fA = A;
620 return lrintf(sqrtf(fA));
621 #endif
622 }
623