1 /******************************************************************************\
2 * Project: MSP Simulation Layer for Vector Unit Computational Divides *
3 * Authors: Iconoclast *
4 * Release: 2018.11.27 *
5 * License: CC0 Public Domain Dedication *
6 * *
7 * To the extent possible under law, the author(s) have dedicated all copyright *
8 * and related and neighboring rights to this software to the public domain *
9 * worldwide. This software is distributed without any warranty. *
10 * *
11 * You should have received a copy of the CC0 Public Domain Dedication along *
12 * with this software. *
13 * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. *
14 \******************************************************************************/
15
16 #include "divide.h"
17
18 static s32 DivIn = 0; /* buffered numerator of division read from vector file */
19 static s32 DivOut = 0; /* global division result set by VRCP/VRCPL/VRSQ/VRSQL */
20 #if (0 != 0)
21 static s32 MovIn; /* We do not emulate this register (obsolete, for VMOV). */
22 #endif
23
24 /*
25 * Boolean flag: Double-precision high was the last vector divide op?
26 *
27 * if (lastDivideOp == VRCP, VRCPL, VRSQ, VRSQL)
28 * DPH = false; // single-precision or double-precision low, not high
29 * else if (lastDivideOp == VRCPH, VRSQH)
30 * DPH = true; // double-precision high
31 * else if (lastDivideOp == VMOV, VNOP)
32 * DPH = DPH; // no change--divide-group ops but not real divides
33 */
34 static int DPH = 0;
35
36 /*
37 * 11-bit vector divide result look-up table
38 * Thanks to MAME / MESS for organizing.
39 */
40 static const u16 div_ROM[1 << 10] = {
41 0xFFFFu,
42 0xFF00u,
43 0xFE01u,
44 0xFD04u,
45 0xFC07u,
46 0xFB0Cu,
47 0xFA11u,
48 0xF918u,
49 0xF81Fu,
50 0xF727u,
51 0xF631u,
52 0xF53Bu,
53 0xF446u,
54 0xF352u,
55 0xF25Fu,
56 0xF16Du,
57 0xF07Cu,
58 0xEF8Bu,
59 0xEE9Cu,
60 0xEDAEu,
61 0xECC0u,
62 0xEBD3u,
63 0xEAE8u,
64 0xE9FDu,
65 0xE913u,
66 0xE829u,
67 0xE741u,
68 0xE65Au,
69 0xE573u,
70 0xE48Du,
71 0xE3A9u,
72 0xE2C5u,
73 0xE1E1u,
74 0xE0FFu,
75 0xE01Eu,
76 0xDF3Du,
77 0xDE5Du,
78 0xDD7Eu,
79 0xDCA0u,
80 0xDBC2u,
81 0xDAE6u,
82 0xDA0Au,
83 0xD92Fu,
84 0xD854u,
85 0xD77Bu,
86 0xD6A2u,
87 0xD5CAu,
88 0xD4F3u,
89 0xD41Du,
90 0xD347u,
91 0xD272u,
92 0xD19Eu,
93 0xD0CBu,
94 0xCFF8u,
95 0xCF26u,
96 0xCE55u,
97 0xCD85u,
98 0xCCB5u,
99 0xCBE6u,
100 0xCB18u,
101 0xCA4Bu,
102 0xC97Eu,
103 0xC8B2u,
104 0xC7E7u,
105 0xC71Cu,
106 0xC652u,
107 0xC589u,
108 0xC4C0u,
109 0xC3F8u,
110 0xC331u,
111 0xC26Bu,
112 0xC1A5u,
113 0xC0E0u,
114 0xC01Cu,
115 0xBF58u,
116 0xBE95u,
117 0xBDD2u,
118 0xBD10u,
119 0xBC4Fu,
120 0xBB8Fu,
121 0xBACFu,
122 0xBA10u,
123 0xB951u,
124 0xB894u,
125 0xB7D6u,
126 0xB71Au,
127 0xB65Eu,
128 0xB5A2u,
129 0xB4E8u,
130 0xB42Eu,
131 0xB374u,
132 0xB2BBu,
133 0xB203u,
134 0xB14Bu,
135 0xB094u,
136 0xAFDEu,
137 0xAF28u,
138 0xAE73u,
139 0xADBEu,
140 0xAD0Au,
141 0xAC57u,
142 0xABA4u,
143 0xAAF1u,
144 0xAA40u,
145 0xA98Eu,
146 0xA8DEu,
147 0xA82Eu,
148 0xA77Eu,
149 0xA6D0u,
150 0xA621u,
151 0xA574u,
152 0xA4C6u,
153 0xA41Au,
154 0xA36Eu,
155 0xA2C2u,
156 0xA217u,
157 0xA16Du,
158 0xA0C3u,
159 0xA01Au,
160 0x9F71u,
161 0x9EC8u,
162 0x9E21u,
163 0x9D79u,
164 0x9CD3u,
165 0x9C2Du,
166 0x9B87u,
167 0x9AE2u,
168 0x9A3Du,
169 0x9999u,
170 0x98F6u,
171 0x9852u,
172 0x97B0u,
173 0x970Eu,
174 0x966Cu,
175 0x95CBu,
176 0x952Bu,
177 0x948Bu,
178 0x93EBu,
179 0x934Cu,
180 0x92ADu,
181 0x920Fu,
182 0x9172u,
183 0x90D4u,
184 0x9038u,
185 0x8F9Cu,
186 0x8F00u,
187 0x8E65u,
188 0x8DCAu,
189 0x8D30u,
190 0x8C96u,
191 0x8BFCu,
192 0x8B64u,
193 0x8ACBu,
194 0x8A33u,
195 0x899Cu,
196 0x8904u,
197 0x886Eu,
198 0x87D8u,
199 0x8742u,
200 0x86ADu,
201 0x8618u,
202 0x8583u,
203 0x84F0u,
204 0x845Cu,
205 0x83C9u,
206 0x8336u,
207 0x82A4u,
208 0x8212u,
209 0x8181u,
210 0x80F0u,
211 0x8060u,
212 0x7FD0u,
213 0x7F40u,
214 0x7EB1u,
215 0x7E22u,
216 0x7D93u,
217 0x7D05u,
218 0x7C78u,
219 0x7BEBu,
220 0x7B5Eu,
221 0x7AD2u,
222 0x7A46u,
223 0x79BAu,
224 0x792Fu,
225 0x78A4u,
226 0x781Au,
227 0x7790u,
228 0x7706u,
229 0x767Du,
230 0x75F5u,
231 0x756Cu,
232 0x74E4u,
233 0x745Du,
234 0x73D5u,
235 0x734Fu,
236 0x72C8u,
237 0x7242u,
238 0x71BCu,
239 0x7137u,
240 0x70B2u,
241 0x702Eu,
242 0x6FA9u,
243 0x6F26u,
244 0x6EA2u,
245 0x6E1Fu,
246 0x6D9Cu,
247 0x6D1Au,
248 0x6C98u,
249 0x6C16u,
250 0x6B95u,
251 0x6B14u,
252 0x6A94u,
253 0x6A13u,
254 0x6993u,
255 0x6914u,
256 0x6895u,
257 0x6816u,
258 0x6798u,
259 0x6719u,
260 0x669Cu,
261 0x661Eu,
262 0x65A1u,
263 0x6524u,
264 0x64A8u,
265 0x642Cu,
266 0x63B0u,
267 0x6335u,
268 0x62BAu,
269 0x623Fu,
270 0x61C5u,
271 0x614Bu,
272 0x60D1u,
273 0x6058u,
274 0x5FDFu,
275 0x5F66u,
276 0x5EEDu,
277 0x5E75u,
278 0x5DFDu,
279 0x5D86u,
280 0x5D0Fu,
281 0x5C98u,
282 0x5C22u,
283 0x5BABu,
284 0x5B35u,
285 0x5AC0u,
286 0x5A4Bu,
287 0x59D6u,
288 0x5961u,
289 0x58EDu,
290 0x5879u,
291 0x5805u,
292 0x5791u,
293 0x571Eu,
294 0x56ACu,
295 0x5639u,
296 0x55C7u,
297 0x5555u,
298 0x54E3u,
299 0x5472u,
300 0x5401u,
301 0x5390u,
302 0x5320u,
303 0x52AFu,
304 0x5240u,
305 0x51D0u,
306 0x5161u,
307 0x50F2u,
308 0x5083u,
309 0x5015u,
310 0x4FA6u,
311 0x4F38u,
312 0x4ECBu,
313 0x4E5Eu,
314 0x4DF1u,
315 0x4D84u,
316 0x4D17u,
317 0x4CABu,
318 0x4C3Fu,
319 0x4BD3u,
320 0x4B68u,
321 0x4AFDu,
322 0x4A92u,
323 0x4A27u,
324 0x49BDu,
325 0x4953u,
326 0x48E9u,
327 0x4880u,
328 0x4817u,
329 0x47AEu,
330 0x4745u,
331 0x46DCu,
332 0x4674u,
333 0x460Cu,
334 0x45A5u,
335 0x453Du,
336 0x44D6u,
337 0x446Fu,
338 0x4408u,
339 0x43A2u,
340 0x433Cu,
341 0x42D6u,
342 0x4270u,
343 0x420Bu,
344 0x41A6u,
345 0x4141u,
346 0x40DCu,
347 0x4078u,
348 0x4014u,
349 0x3FB0u,
350 0x3F4Cu,
351 0x3EE8u,
352 0x3E85u,
353 0x3E22u,
354 0x3DC0u,
355 0x3D5Du,
356 0x3CFBu,
357 0x3C99u,
358 0x3C37u,
359 0x3BD6u,
360 0x3B74u,
361 0x3B13u,
362 0x3AB2u,
363 0x3A52u,
364 0x39F1u,
365 0x3991u,
366 0x3931u,
367 0x38D2u,
368 0x3872u,
369 0x3813u,
370 0x37B4u,
371 0x3755u,
372 0x36F7u,
373 0x3698u,
374 0x363Au,
375 0x35DCu,
376 0x357Fu,
377 0x3521u,
378 0x34C4u,
379 0x3467u,
380 0x340Au,
381 0x33AEu,
382 0x3351u,
383 0x32F5u,
384 0x3299u,
385 0x323Eu,
386 0x31E2u,
387 0x3187u,
388 0x312Cu,
389 0x30D1u,
390 0x3076u,
391 0x301Cu,
392 0x2FC2u,
393 0x2F68u,
394 0x2F0Eu,
395 0x2EB4u,
396 0x2E5Bu,
397 0x2E02u,
398 0x2DA9u,
399 0x2D50u,
400 0x2CF8u,
401 0x2C9Fu,
402 0x2C47u,
403 0x2BEFu,
404 0x2B97u,
405 0x2B40u,
406 0x2AE8u,
407 0x2A91u,
408 0x2A3Au,
409 0x29E4u,
410 0x298Du,
411 0x2937u,
412 0x28E0u,
413 0x288Bu,
414 0x2835u,
415 0x27DFu,
416 0x278Au,
417 0x2735u,
418 0x26E0u,
419 0x268Bu,
420 0x2636u,
421 0x25E2u,
422 0x258Du,
423 0x2539u,
424 0x24E5u,
425 0x2492u,
426 0x243Eu,
427 0x23EBu,
428 0x2398u,
429 0x2345u,
430 0x22F2u,
431 0x22A0u,
432 0x224Du,
433 0x21FBu,
434 0x21A9u,
435 0x2157u,
436 0x2105u,
437 0x20B4u,
438 0x2063u,
439 0x2012u,
440 0x1FC1u,
441 0x1F70u,
442 0x1F1Fu,
443 0x1ECFu,
444 0x1E7Fu,
445 0x1E2Eu,
446 0x1DDFu,
447 0x1D8Fu,
448 0x1D3Fu,
449 0x1CF0u,
450 0x1CA1u,
451 0x1C52u,
452 0x1C03u,
453 0x1BB4u,
454 0x1B66u,
455 0x1B17u,
456 0x1AC9u,
457 0x1A7Bu,
458 0x1A2Du,
459 0x19E0u,
460 0x1992u,
461 0x1945u,
462 0x18F8u,
463 0x18ABu,
464 0x185Eu,
465 0x1811u,
466 0x17C4u,
467 0x1778u,
468 0x172Cu,
469 0x16E0u,
470 0x1694u,
471 0x1648u,
472 0x15FDu,
473 0x15B1u,
474 0x1566u,
475 0x151Bu,
476 0x14D0u,
477 0x1485u,
478 0x143Bu,
479 0x13F0u,
480 0x13A6u,
481 0x135Cu,
482 0x1312u,
483 0x12C8u,
484 0x127Fu,
485 0x1235u,
486 0x11ECu,
487 0x11A3u,
488 0x1159u,
489 0x1111u,
490 0x10C8u,
491 0x107Fu,
492 0x1037u,
493 0x0FEFu,
494 0x0FA6u,
495 0x0F5Eu,
496 0x0F17u,
497 0x0ECFu,
498 0x0E87u,
499 0x0E40u,
500 0x0DF9u,
501 0x0DB2u,
502 0x0D6Bu,
503 0x0D24u,
504 0x0CDDu,
505 0x0C97u,
506 0x0C50u,
507 0x0C0Au,
508 0x0BC4u,
509 0x0B7Eu,
510 0x0B38u,
511 0x0AF2u,
512 0x0AADu,
513 0x0A68u,
514 0x0A22u,
515 0x09DDu,
516 0x0998u,
517 0x0953u,
518 0x090Fu,
519 0x08CAu,
520 0x0886u,
521 0x0842u,
522 0x07FDu,
523 0x07B9u,
524 0x0776u,
525 0x0732u,
526 0x06EEu,
527 0x06ABu,
528 0x0668u,
529 0x0624u,
530 0x05E1u,
531 0x059Eu,
532 0x055Cu,
533 0x0519u,
534 0x04D6u,
535 0x0494u,
536 0x0452u,
537 0x0410u,
538 0x03CEu,
539 0x038Cu,
540 0x034Au,
541 0x0309u,
542 0x02C7u,
543 0x0286u,
544 0x0245u,
545 0x0204u,
546 0x01C3u,
547 0x0182u,
548 0x0141u,
549 0x0101u,
550 0x00C0u,
551 0x0080u,
552 0x0040u,
553 0x6A09u,
554 0xFFFFu,
555 0x6955u,
556 0xFF00u,
557 0x68A1u,
558 0xFE02u,
559 0x67EFu,
560 0xFD06u,
561 0x673Eu,
562 0xFC0Bu,
563 0x668Du,
564 0xFB12u,
565 0x65DEu,
566 0xFA1Au,
567 0x6530u,
568 0xF923u,
569 0x6482u,
570 0xF82Eu,
571 0x63D6u,
572 0xF73Bu,
573 0x632Bu,
574 0xF648u,
575 0x6280u,
576 0xF557u,
577 0x61D7u,
578 0xF467u,
579 0x612Eu,
580 0xF379u,
581 0x6087u,
582 0xF28Cu,
583 0x5FE0u,
584 0xF1A0u,
585 0x5F3Au,
586 0xF0B6u,
587 0x5E95u,
588 0xEFCDu,
589 0x5DF1u,
590 0xEEE5u,
591 0x5D4Eu,
592 0xEDFFu,
593 0x5CACu,
594 0xED19u,
595 0x5C0Bu,
596 0xEC35u,
597 0x5B6Bu,
598 0xEB52u,
599 0x5ACBu,
600 0xEA71u,
601 0x5A2Cu,
602 0xE990u,
603 0x598Fu,
604 0xE8B1u,
605 0x58F2u,
606 0xE7D3u,
607 0x5855u,
608 0xE6F6u,
609 0x57BAu,
610 0xE61Bu,
611 0x5720u,
612 0xE540u,
613 0x5686u,
614 0xE467u,
615 0x55EDu,
616 0xE38Eu,
617 0x5555u,
618 0xE2B7u,
619 0x54BEu,
620 0xE1E1u,
621 0x5427u,
622 0xE10Du,
623 0x5391u,
624 0xE039u,
625 0x52FCu,
626 0xDF66u,
627 0x5268u,
628 0xDE94u,
629 0x51D5u,
630 0xDDC4u,
631 0x5142u,
632 0xDCF4u,
633 0x50B0u,
634 0xDC26u,
635 0x501Fu,
636 0xDB59u,
637 0x4F8Eu,
638 0xDA8Cu,
639 0x4EFEu,
640 0xD9C1u,
641 0x4E6Fu,
642 0xD8F7u,
643 0x4DE1u,
644 0xD82Du,
645 0x4D53u,
646 0xD765u,
647 0x4CC6u,
648 0xD69Eu,
649 0x4C3Au,
650 0xD5D7u,
651 0x4BAFu,
652 0xD512u,
653 0x4B24u,
654 0xD44Eu,
655 0x4A9Au,
656 0xD38Au,
657 0x4A10u,
658 0xD2C8u,
659 0x4987u,
660 0xD206u,
661 0x48FFu,
662 0xD146u,
663 0x4878u,
664 0xD086u,
665 0x47F1u,
666 0xCFC7u,
667 0x476Bu,
668 0xCF0Au,
669 0x46E5u,
670 0xCE4Du,
671 0x4660u,
672 0xCD91u,
673 0x45DCu,
674 0xCCD6u,
675 0x4558u,
676 0xCC1Bu,
677 0x44D5u,
678 0xCB62u,
679 0x4453u,
680 0xCAA9u,
681 0x43D1u,
682 0xC9F2u,
683 0x434Fu,
684 0xC93Bu,
685 0x42CFu,
686 0xC885u,
687 0x424Fu,
688 0xC7D0u,
689 0x41CFu,
690 0xC71Cu,
691 0x4151u,
692 0xC669u,
693 0x40D2u,
694 0xC5B6u,
695 0x4055u,
696 0xC504u,
697 0x3FD8u,
698 0xC453u,
699 0x3F5Bu,
700 0xC3A3u,
701 0x3EDFu,
702 0xC2F4u,
703 0x3E64u,
704 0xC245u,
705 0x3DE9u,
706 0xC198u,
707 0x3D6Eu,
708 0xC0EBu,
709 0x3CF5u,
710 0xC03Fu,
711 0x3C7Cu,
712 0xBF93u,
713 0x3C03u,
714 0xBEE9u,
715 0x3B8Bu,
716 0xBE3Fu,
717 0x3B13u,
718 0xBD96u,
719 0x3A9Cu,
720 0xBCEDu,
721 0x3A26u,
722 0xBC46u,
723 0x39B0u,
724 0xBB9Fu,
725 0x393Au,
726 0xBAF8u,
727 0x38C5u,
728 0xBA53u,
729 0x3851u,
730 0xB9AEu,
731 0x37DDu,
732 0xB90Au,
733 0x3769u,
734 0xB867u,
735 0x36F6u,
736 0xB7C5u,
737 0x3684u,
738 0xB723u,
739 0x3612u,
740 0xB681u,
741 0x35A0u,
742 0xB5E1u,
743 0x352Fu,
744 0xB541u,
745 0x34BFu,
746 0xB4A2u,
747 0x344Fu,
748 0xB404u,
749 0x33DFu,
750 0xB366u,
751 0x3370u,
752 0xB2C9u,
753 0x3302u,
754 0xB22Cu,
755 0x3293u,
756 0xB191u,
757 0x3226u,
758 0xB0F5u,
759 0x31B9u,
760 0xB05Bu,
761 0x314Cu,
762 0xAFC1u,
763 0x30DFu,
764 0xAF28u,
765 0x3074u,
766 0xAE8Fu,
767 0x3008u,
768 0xADF7u,
769 0x2F9Du,
770 0xAD60u,
771 0x2F33u,
772 0xACC9u,
773 0x2EC8u,
774 0xAC33u,
775 0x2E5Fu,
776 0xAB9Eu,
777 0x2DF6u,
778 0xAB09u,
779 0x2D8Du,
780 0xAA75u,
781 0x2D24u,
782 0xA9E1u,
783 0x2CBCu,
784 0xA94Eu,
785 0x2C55u,
786 0xA8BCu,
787 0x2BEEu,
788 0xA82Au,
789 0x2B87u,
790 0xA799u,
791 0x2B21u,
792 0xA708u,
793 0x2ABBu,
794 0xA678u,
795 0x2A55u,
796 0xA5E8u,
797 0x29F0u,
798 0xA559u,
799 0x298Bu,
800 0xA4CBu,
801 0x2927u,
802 0xA43Du,
803 0x28C3u,
804 0xA3B0u,
805 0x2860u,
806 0xA323u,
807 0x27FDu,
808 0xA297u,
809 0x279Au,
810 0xA20Bu,
811 0x2738u,
812 0xA180u,
813 0x26D6u,
814 0xA0F6u,
815 0x2674u,
816 0xA06Cu,
817 0x2613u,
818 0x9FE2u,
819 0x25B2u,
820 0x9F59u,
821 0x2552u,
822 0x9ED1u,
823 0x24F2u,
824 0x9E49u,
825 0x2492u,
826 0x9DC2u,
827 0x2432u,
828 0x9D3Bu,
829 0x23D3u,
830 0x9CB4u,
831 0x2375u,
832 0x9C2Fu,
833 0x2317u,
834 0x9BA9u,
835 0x22B9u,
836 0x9B25u,
837 0x225Bu,
838 0x9AA0u,
839 0x21FEu,
840 0x9A1Cu,
841 0x21A1u,
842 0x9999u,
843 0x2145u,
844 0x9916u,
845 0x20E8u,
846 0x9894u,
847 0x208Du,
848 0x9812u,
849 0x2031u,
850 0x9791u,
851 0x1FD6u,
852 0x9710u,
853 0x1F7Bu,
854 0x968Fu,
855 0x1F21u,
856 0x960Fu,
857 0x1EC7u,
858 0x9590u,
859 0x1E6Du,
860 0x9511u,
861 0x1E13u,
862 0x9492u,
863 0x1DBAu,
864 0x9414u,
865 0x1D61u,
866 0x9397u,
867 0x1D09u,
868 0x931Au,
869 0x1CB1u,
870 0x929Du,
871 0x1C59u,
872 0x9221u,
873 0x1C01u,
874 0x91A5u,
875 0x1BAAu,
876 0x9129u,
877 0x1B53u,
878 0x90AFu,
879 0x1AFCu,
880 0x9034u,
881 0x1AA6u,
882 0x8FBAu,
883 0x1A50u,
884 0x8F40u,
885 0x19FAu,
886 0x8EC7u,
887 0x19A5u,
888 0x8E4Fu,
889 0x1950u,
890 0x8DD6u,
891 0x18FBu,
892 0x8D5Eu,
893 0x18A7u,
894 0x8CE7u,
895 0x1853u,
896 0x8C70u,
897 0x17FFu,
898 0x8BF9u,
899 0x17ABu,
900 0x8B83u,
901 0x1758u,
902 0x8B0Du,
903 0x1705u,
904 0x8A98u,
905 0x16B2u,
906 0x8A23u,
907 0x1660u,
908 0x89AEu,
909 0x160Du,
910 0x893Au,
911 0x15BCu,
912 0x88C6u,
913 0x156Au,
914 0x8853u,
915 0x1519u,
916 0x87E0u,
917 0x14C8u,
918 0x876Du,
919 0x1477u,
920 0x86FBu,
921 0x1426u,
922 0x8689u,
923 0x13D6u,
924 0x8618u,
925 0x1386u,
926 0x85A7u,
927 0x1337u,
928 0x8536u,
929 0x12E7u,
930 0x84C6u,
931 0x1298u,
932 0x8456u,
933 0x1249u,
934 0x83E7u,
935 0x11FBu,
936 0x8377u,
937 0x11ACu,
938 0x8309u,
939 0x115Eu,
940 0x829Au,
941 0x1111u,
942 0x822Cu,
943 0x10C3u,
944 0x81BFu,
945 0x1076u,
946 0x8151u,
947 0x1029u,
948 0x80E4u,
949 0x0FDCu,
950 0x8078u,
951 0x0F8Fu,
952 0x800Cu,
953 0x0F43u,
954 0x7FA0u,
955 0x0EF7u,
956 0x7F34u,
957 0x0EABu,
958 0x7EC9u,
959 0x0E60u,
960 0x7E5Eu,
961 0x0E15u,
962 0x7DF4u,
963 0x0DCAu,
964 0x7D8Au,
965 0x0D7Fu,
966 0x7D20u,
967 0x0D34u,
968 0x7CB6u,
969 0x0CEAu,
970 0x7C4Du,
971 0x0CA0u,
972 0x7BE5u,
973 0x0C56u,
974 0x7B7Cu,
975 0x0C0Cu,
976 0x7B14u,
977 0x0BC3u,
978 0x7AACu,
979 0x0B7Au,
980 0x7A45u,
981 0x0B31u,
982 0x79DEu,
983 0x0AE8u,
984 0x7977u,
985 0x0AA0u,
986 0x7911u,
987 0x0A58u,
988 0x78ABu,
989 0x0A10u,
990 0x7845u,
991 0x09C8u,
992 0x77DFu,
993 0x0981u,
994 0x777Au,
995 0x0939u,
996 0x7715u,
997 0x08F2u,
998 0x76B1u,
999 0x08ABu,
1000 0x764Du,
1001 0x0865u,
1002 0x75E9u,
1003 0x081Eu,
1004 0x7585u,
1005 0x07D8u,
1006 0x7522u,
1007 0x0792u,
1008 0x74BFu,
1009 0x074Du,
1010 0x745Du,
1011 0x0707u,
1012 0x73FAu,
1013 0x06C2u,
1014 0x7398u,
1015 0x067Du,
1016 0x7337u,
1017 0x0638u,
1018 0x72D5u,
1019 0x05F3u,
1020 0x7274u,
1021 0x05AFu,
1022 0x7213u,
1023 0x056Au,
1024 0x71B3u,
1025 0x0526u,
1026 0x7152u,
1027 0x04E2u,
1028 0x70F2u,
1029 0x049Fu,
1030 0x7093u,
1031 0x045Bu,
1032 0x7033u,
1033 0x0418u,
1034 0x6FD4u,
1035 0x03D5u,
1036 0x6F76u,
1037 0x0392u,
1038 0x6F17u,
1039 0x0350u,
1040 0x6EB9u,
1041 0x030Du,
1042 0x6E5Bu,
1043 0x02CBu,
1044 0x6DFDu,
1045 0x0289u,
1046 0x6DA0u,
1047 0x0247u,
1048 0x6D43u,
1049 0x0206u,
1050 0x6CE6u,
1051 0x01C4u,
1052 0x6C8Au,
1053 0x0183u,
1054 0x6C2Du,
1055 0x0142u,
1056 0x6BD1u,
1057 0x0101u,
1058 0x6B76u,
1059 0x00C0u,
1060 0x6B1Au,
1061 0x0080u,
1062 0x6ABFu,
1063 0x0040u,
1064 0x6A64u,
1065 };
1066
1067 enum {
1068 SP_DIV_SQRT_NO,
1069 SP_DIV_SQRT_YES
1070 };
1071 enum {
1072 SP_DIV_PRECISION_SINGLE = 0,
1073 SP_DIV_PRECISION_DOUBLE = ~0
1074 /*, SP_DIV_PRECISION_CURRENT */
1075 };
1076
do_div(i32 data,int sqrt,int precision)1077 NOINLINE static void do_div(i32 data, int sqrt, int precision)
1078 {
1079 i32 addr;
1080 int fetch;
1081 int shift;
1082
1083 #if ((~0 >> 1 == -1) && (0))
1084 data ^= (s32)(((s64)data + 32768) >> 63); /* DP only: (data < -32768) */
1085 fetch = (s32)(((s32)data + 0) >> 31);
1086 data ^= fetch;
1087 data -= fetch; /* two's complement: -x == ~x - (~0) on wrap-around */
1088 #else
1089 if (precision == SP_DIV_PRECISION_SINGLE)
1090 data = (data < 0) ? -data : +data;
1091 if (precision == SP_DIV_PRECISION_DOUBLE && data < 0)
1092 data = (data >= -32768) ? -data : ~data;
1093 #endif
1094
1095 /*
1096 * Note, from the code just above, that data cannot be negative.
1097 * (data >= 0) is unconditionally forced by the above algorithm.
1098 */
1099 addr = data;
1100 if (data == 0x00000000) {
1101 shift = (precision == SP_DIV_PRECISION_SINGLE) ? 16 : 0;
1102 addr = addr << shift;
1103 } else {
1104 for (shift = 0; addr >= 0x00000000; addr <<= 1, shift++)
1105 ;
1106 }
1107 addr = (addr >> 22) & 0x000001FF;
1108
1109 if (sqrt == SP_DIV_SQRT_YES) {
1110 addr &= 0x000001FE;
1111 addr |= 0x00000200 | (shift & 1);
1112 }
1113 shift ^= 31; /* flipping shift direction from left- to right- */
1114 shift >>= (sqrt == SP_DIV_SQRT_YES);
1115 DivOut = (0x40000000UL | ((u32)div_ROM[addr] << 14)) >> shift;
1116 if (DivIn == 0) /* corner case: overflow via division by zero */
1117 DivOut = +0x7FFFFFFFl;
1118 else if (DivIn == -32768) /* corner case: signed underflow barrier */
1119 DivOut = -0x00010000l;
1120 else
1121 DivOut ^= (DivIn < 0) ? ~0 : 0;
1122 return;
1123 }
1124
VRCP(v16 vs,v16 vt)1125 VECTOR_OPERATION VRCP(v16 vs, v16 vt)
1126 {
1127 const int result = (inst_word & 0x000007FF) >> 6;
1128 const int source = (inst_word & 0x0000FFFF) >> 11;
1129 const int target = (inst_word >> 16) & 31;
1130 const unsigned int element = (inst_word >> 21) & 0x7;
1131
1132 DivIn = (i32)VR[target][element];
1133 do_div(DivIn, SP_DIV_SQRT_NO, SP_DIV_PRECISION_SINGLE);
1134 #ifdef ARCH_MIN_SSE2
1135 *(v16 *)VACC_L = vt;
1136 #else
1137 vector_copy(VACC_L, vt);
1138 #endif
1139 VR[result][source & 07] = (i16)DivOut;
1140 DPH = SP_DIV_PRECISION_SINGLE;
1141 #ifdef ARCH_MIN_SSE2
1142 COMPILER_FENCE();
1143 vs = *(v16 *)VR[result];
1144 return (vs);
1145 #else
1146 vector_copy(V_result, VR[result]);
1147 vs = vt; /* unused */
1148 return;
1149 #endif
1150 }
1151
VRCPL(v16 vs,v16 vt)1152 VECTOR_OPERATION VRCPL(v16 vs, v16 vt)
1153 {
1154 const int result = (inst_word & 0x000007FF) >> 6;
1155 const int source = (inst_word & 0x0000FFFF) >> 11;
1156 const int target = (inst_word >> 16) & 31;
1157 const unsigned int element = (inst_word >> 21) & 0x7;
1158
1159 if (DPH == SP_DIV_PRECISION_SINGLE)
1160 DivIn = (s32)(s16)(VR[target][element]);
1161 else
1162 DivIn |= (s32)(u16)(VR[target][element] & 0xFFFFu);
1163 do_div(DivIn, SP_DIV_SQRT_NO, DPH);
1164 #ifdef ARCH_MIN_SSE2
1165 *(v16 *)VACC_L = vt;
1166 #else
1167 vector_copy(VACC_L, vt);
1168 #endif
1169 VR[result][source & 07] = (i16)DivOut;
1170 DPH = SP_DIV_PRECISION_SINGLE;
1171 #ifdef ARCH_MIN_SSE2
1172 COMPILER_FENCE();
1173 vs = *(v16 *)VR[result];
1174 return (vs);
1175 #else
1176 vector_copy(V_result, VR[result]);
1177 vs = vt; /* unused */
1178 return;
1179 #endif
1180 }
1181
VRCPH(v16 vs,v16 vt)1182 VECTOR_OPERATION VRCPH(v16 vs, v16 vt)
1183 {
1184 const int result = (inst_word & 0x000007FF) >> 6;
1185 const int source = (inst_word & 0x0000FFFF) >> 11;
1186 const int target = (inst_word >> 16) & 31;
1187 const unsigned int element = (inst_word >> 21) & 0x7;
1188
1189 DivIn = VR[target][element] << 16;
1190 #ifdef ARCH_MIN_SSE2
1191 *(v16 *)VACC_L = vt;
1192 #else
1193 vector_copy(VACC_L, vt);
1194 #endif
1195 VR[result][source & 07] = DivOut >> 16;
1196 DPH = SP_DIV_PRECISION_DOUBLE;
1197 #ifdef ARCH_MIN_SSE2
1198 COMPILER_FENCE();
1199 vs = *(v16 *)VR[result];
1200 return (vs);
1201 #else
1202 vector_copy(V_result, VR[result]);
1203 vs = vt; /* unused */
1204 return;
1205 #endif
1206 }
1207
VMOV(v16 vs,v16 vt)1208 VECTOR_OPERATION VMOV(v16 vs, v16 vt)
1209 {
1210 const int result = (inst_word & 0x000007FF) >> 6;
1211 const int source = (inst_word & 0x0000FFFF) >> 11;
1212 const unsigned int element = (inst_word >> 21) & 0x7;
1213
1214 #ifdef ARCH_MIN_SSE2
1215 *(v16 *)VACC_L = vt;
1216 #else
1217 vector_copy(VACC_L, vt);
1218 #endif
1219 VR[result][source & 07] = VACC_L[element];
1220 #ifdef ARCH_MIN_SSE2
1221 COMPILER_FENCE();
1222 vs = *(v16 *)VR[result];
1223 return (vs);
1224 #else
1225 vector_copy(V_result, VR[result]);
1226 vs = vt; /* unused */
1227 return;
1228 #endif
1229 }
1230
VRSQ(v16 vs,v16 vt)1231 VECTOR_OPERATION VRSQ(v16 vs, v16 vt)
1232 {
1233 const int result = (inst_word & 0x000007FF) >> 6;
1234 const int source = (inst_word & 0x0000FFFF) >> 11;
1235 const int target = (inst_word >> 16) & 31;
1236 const unsigned int element = (inst_word >> 21) & 0x7;
1237
1238 DivIn = (i32)VR[target][element];
1239 do_div(DivIn, SP_DIV_SQRT_YES, SP_DIV_PRECISION_SINGLE);
1240 #ifdef ARCH_MIN_SSE2
1241 *(v16 *)VACC_L = vt;
1242 #else
1243 vector_copy(VACC_L, vt);
1244 #endif
1245 VR[result][source & 07] = (i16)DivOut;
1246 DPH = SP_DIV_PRECISION_SINGLE;
1247 #ifdef ARCH_MIN_SSE2
1248 COMPILER_FENCE();
1249 vs = *(v16 *)VR[result];
1250 return (vs);
1251 #else
1252 vector_copy(V_result, VR[result]);
1253 vs = vt; /* unused */
1254 return;
1255 #endif
1256 }
1257
VRSQL(v16 vs,v16 vt)1258 VECTOR_OPERATION VRSQL(v16 vs, v16 vt)
1259 {
1260 const int result = (inst_word & 0x000007FF) >> 6;
1261 const int source = (inst_word & 0x0000FFFF) >> 11;
1262 const int target = (inst_word >> 16) & 31;
1263 const unsigned int element = (inst_word >> 21) & 0x7;
1264
1265 if (DPH == SP_DIV_PRECISION_SINGLE)
1266 DivIn = (s32)(s16)(VR[target][element]);
1267 else
1268 DivIn |= (s32)(u16)(VR[target][element] & 0xFFFFu);
1269 do_div(DivIn, SP_DIV_SQRT_YES, DPH);
1270 #ifdef ARCH_MIN_SSE2
1271 *(v16 *)VACC_L = vt;
1272 #else
1273 vector_copy(VACC_L, vt);
1274 #endif
1275 VR[result][source & 07] = (i16)DivOut;
1276 DPH = SP_DIV_PRECISION_SINGLE;
1277 #ifdef ARCH_MIN_SSE2
1278 COMPILER_FENCE();
1279 vs = *(v16 *)VR[result];
1280 return (vs);
1281 #else
1282 vector_copy(V_result, VR[result]);
1283 vs = vt; /* unused */
1284 return;
1285 #endif
1286 }
1287
VRSQH(v16 vs,v16 vt)1288 VECTOR_OPERATION VRSQH(v16 vs, v16 vt)
1289 {
1290 const int result = (inst_word & 0x000007FF) >> 6;
1291 const int source = (inst_word & 0x0000FFFF) >> 11;
1292 const int target = (inst_word >> 16) & 31;
1293 const unsigned int element = (inst_word >> 21) & 0x7;
1294
1295 DivIn = VR[target][element] << 16;
1296 #ifdef ARCH_MIN_SSE2
1297 *(v16 *)VACC_L = vt;
1298 #else
1299 vector_copy(VACC_L, vt);
1300 #endif
1301 VR[result][source & 07] = DivOut >> 16;
1302 DPH = SP_DIV_PRECISION_DOUBLE;
1303 #ifdef ARCH_MIN_SSE2
1304 COMPILER_FENCE();
1305 vs = *(v16 *)VR[result];
1306 return (vs);
1307 #else
1308 vector_copy(V_result, VR[result]);
1309 vs = vt; /* unused */
1310 return;
1311 #endif
1312 }
1313
VNOP(v16 vs,v16 vt)1314 VECTOR_OPERATION VNOP(v16 vs, v16 vt)
1315 {
1316 const int result = (inst_word & 0x000007FF) >> 6;
1317
1318 #ifdef ARCH_MIN_SSE2
1319 vs = *(v16 *)VR[result];
1320 return (vt = vs); /* -Wunused-but-set-parameter */
1321 #else
1322 vector_copy(V_result, VR[result]);
1323 if (vt == vs)
1324 return; /* -Wunused-but-set-parameter */
1325 return;
1326 #endif
1327 }
1328