1 // license:BSD-3-Clause
2 // copyright-holders:Ville Linde, Barry Rodewald, Carl, Philip Bennett, Samuele Zannoli
3 // Pentium+ specific opcodes
4
5 extern flag float32_is_nan( float32 a ); // since its not defined in softfloat.h
6 extern flag float64_is_nan( float64 a ); // since its not defined in softfloat.h
7
MMXPROLOG()8 void i386_device::MMXPROLOG()
9 {
10 //m_x87_sw &= ~(X87_SW_TOP_MASK << X87_SW_TOP_SHIFT); // top = 0
11 m_x87_tw = 0; // tag word = 0
12 }
13
READMMX(uint32_t ea,MMX_REG & r)14 void i386_device::READMMX(uint32_t ea,MMX_REG &r)
15 {
16 r.q=READ64(ea);
17 }
18
WRITEMMX(uint32_t ea,MMX_REG & r)19 void i386_device::WRITEMMX(uint32_t ea,MMX_REG &r)
20 {
21 WRITE64(ea, r.q);
22 }
23
READXMM(uint32_t ea,XMM_REG & r)24 void i386_device::READXMM(uint32_t ea,XMM_REG &r)
25 {
26 r.q[0]=READ64(ea);
27 r.q[1]=READ64(ea+8);
28 }
29
WRITEXMM(uint32_t ea,i386_device::XMM_REG & r)30 void i386_device::WRITEXMM(uint32_t ea,i386_device::XMM_REG &r)
31 {
32 WRITE64(ea, r.q[0]);
33 WRITE64(ea+8, r.q[1]);
34 }
35
READXMM_LO64(uint32_t ea,i386_device::XMM_REG & r)36 void i386_device::READXMM_LO64(uint32_t ea,i386_device::XMM_REG &r)
37 {
38 r.q[0]=READ64(ea);
39 }
40
WRITEXMM_LO64(uint32_t ea,i386_device::XMM_REG & r)41 void i386_device::WRITEXMM_LO64(uint32_t ea,i386_device::XMM_REG &r)
42 {
43 WRITE64(ea, r.q[0]);
44 }
45
READXMM_HI64(uint32_t ea,i386_device::XMM_REG & r)46 void i386_device::READXMM_HI64(uint32_t ea,i386_device::XMM_REG &r)
47 {
48 r.q[1]=READ64(ea);
49 }
50
WRITEXMM_HI64(uint32_t ea,i386_device::XMM_REG & r)51 void i386_device::WRITEXMM_HI64(uint32_t ea,i386_device::XMM_REG &r)
52 {
53 WRITE64(ea, r.q[1]);
54 }
55
pentium_rdmsr()56 void i386_device::pentium_rdmsr() // Opcode 0x0f 32
57 {
58 uint64_t data;
59 bool valid_msr = false;
60
61 // call the model specific implementation
62 data = opcode_rdmsr(valid_msr);
63 if (m_CPL != 0 || valid_msr == false) // if current privilege level isn't 0 or the register isn't recognized ...
64 FAULT(FAULT_GP, 0) // ... throw a general exception fault
65 else
66 {
67 REG32(EDX) = data >> 32;
68 REG32(EAX) = data & 0xffffffff;
69 }
70
71 CYCLES(CYCLES_RDMSR);
72 }
73
pentium_wrmsr()74 void i386_device::pentium_wrmsr() // Opcode 0x0f 30
75 {
76 uint64_t data;
77 bool valid_msr = false;
78
79 data = (uint64_t)REG32(EAX);
80 data |= (uint64_t)(REG32(EDX)) << 32;
81
82 // call the model specific implementation
83 opcode_wrmsr(data, valid_msr);
84
85 if(m_CPL != 0 || valid_msr == 0) // if current privilege level isn't 0 or the register isn't recognized
86 FAULT(FAULT_GP,0) // ... throw a general exception fault
87
88 CYCLES(1); // TODO: correct cycle count (~30-45)
89 }
90
pentium_rdtsc()91 void i386_device::pentium_rdtsc() // Opcode 0x0f 31
92 {
93 uint64_t ts = m_tsc + (m_base_cycles - m_cycles);
94 REG32(EAX) = (uint32_t)(ts);
95 REG32(EDX) = (uint32_t)(ts >> 32);
96
97 CYCLES(CYCLES_RDTSC);
98 }
99
pentium_ud2()100 void i386_device::pentium_ud2() // Opcode 0x0f 0b
101 {
102 i386_trap(6, 0, 0);
103 }
104
pentium_rsm()105 void i386_device::pentium_rsm()
106 {
107 if(!m_smm)
108 {
109 logerror("i386: Invalid RSM outside SMM at %08X\n", m_pc - 1);
110 i386_trap(6, 0, 0);
111 return;
112 }
113
114 leave_smm();
115 if(m_smi_latched)
116 {
117 enter_smm();
118 return;
119 }
120 if(m_nmi_latched)
121 {
122 m_nmi_latched = false;
123 i386_trap(2, 1, 0);
124 }
125 }
126
pentium_prefetch_m8()127 void i386_device::pentium_prefetch_m8() // Opcode 0x0f 18
128 {
129 uint8_t modrm = FETCH();
130 uint32_t ea = GetEA(modrm,0);
131 // TODO: manage the cache if present
132 CYCLES(1+(ea & 1)); // TODO: correct cycle count
133 }
134
pentium_cmovo_r16_rm16()135 void i386_device::pentium_cmovo_r16_rm16() // Opcode 0x0f 40
136 {
137 uint16_t src;
138 uint8_t modrm = FETCH();
139
140 if( modrm >= 0xc0 )
141 {
142 if (m_OF == 1)
143 {
144 src = LOAD_RM16(modrm);
145 STORE_REG16(modrm, src);
146 }
147 CYCLES(1); // TODO: correct cycle count
148 }
149 else
150 {
151 uint32_t ea = GetEA(modrm,0);
152 if (m_OF == 1)
153 {
154 src = READ16(ea);
155 STORE_REG16(modrm, src);
156 }
157 CYCLES(1); // TODO: correct cycle count
158 }
159 }
160
pentium_cmovo_r32_rm32()161 void i386_device::pentium_cmovo_r32_rm32() // Opcode 0x0f 40
162 {
163 uint32_t src;
164 uint8_t modrm = FETCH();
165
166 if( modrm >= 0xc0 )
167 {
168 if (m_OF == 1)
169 {
170 src = LOAD_RM32(modrm);
171 STORE_REG32(modrm, src);
172 }
173 CYCLES(1); // TODO: correct cycle count
174 }
175 else
176 {
177 uint32_t ea = GetEA(modrm,0);
178 if (m_OF == 1)
179 {
180 src = READ32(ea);
181 STORE_REG32(modrm, src);
182 }
183 CYCLES(1); // TODO: correct cycle count
184 }
185 }
186
pentium_cmovno_r16_rm16()187 void i386_device::pentium_cmovno_r16_rm16() // Opcode 0x0f 41
188 {
189 uint16_t src;
190 uint8_t modrm = FETCH();
191
192 if( modrm >= 0xc0 )
193 {
194 if (m_OF == 0)
195 {
196 src = LOAD_RM16(modrm);
197 STORE_REG16(modrm, src);
198 }
199 CYCLES(1); // TODO: correct cycle count
200 }
201 else
202 {
203 uint32_t ea = GetEA(modrm,0);
204 if (m_OF == 0)
205 {
206 src = READ16(ea);
207 STORE_REG16(modrm, src);
208 }
209 CYCLES(1); // TODO: correct cycle count
210 }
211 }
212
pentium_cmovno_r32_rm32()213 void i386_device::pentium_cmovno_r32_rm32() // Opcode 0x0f 41
214 {
215 uint32_t src;
216 uint8_t modrm = FETCH();
217
218 if( modrm >= 0xc0 )
219 {
220 if (m_OF == 0)
221 {
222 src = LOAD_RM32(modrm);
223 STORE_REG32(modrm, src);
224 }
225 CYCLES(1); // TODO: correct cycle count
226 }
227 else
228 {
229 uint32_t ea = GetEA(modrm,0);
230 if (m_OF == 0)
231 {
232 src = READ32(ea);
233 STORE_REG32(modrm, src);
234 }
235 CYCLES(1); // TODO: correct cycle count
236 }
237 }
238
pentium_cmovb_r16_rm16()239 void i386_device::pentium_cmovb_r16_rm16() // Opcode 0x0f 42
240 {
241 uint16_t src;
242 uint8_t modrm = FETCH();
243
244 if( modrm >= 0xc0 )
245 {
246 if (m_CF == 1)
247 {
248 src = LOAD_RM16(modrm);
249 STORE_REG16(modrm, src);
250 }
251 CYCLES(1); // TODO: correct cycle count
252 }
253 else
254 {
255 uint32_t ea = GetEA(modrm,0);
256 if (m_CF == 1)
257 {
258 src = READ16(ea);
259 STORE_REG16(modrm, src);
260 }
261 CYCLES(1); // TODO: correct cycle count
262 }
263 }
264
pentium_cmovb_r32_rm32()265 void i386_device::pentium_cmovb_r32_rm32() // Opcode 0x0f 42
266 {
267 uint32_t src;
268 uint8_t modrm = FETCH();
269
270 if( modrm >= 0xc0 )
271 {
272 if (m_CF == 1)
273 {
274 src = LOAD_RM32(modrm);
275 STORE_REG32(modrm, src);
276 }
277 CYCLES(1); // TODO: correct cycle count
278 }
279 else
280 {
281 uint32_t ea = GetEA(modrm,0);
282 if (m_CF == 1)
283 {
284 src = READ32(ea);
285 STORE_REG32(modrm, src);
286 }
287 CYCLES(1); // TODO: correct cycle count
288 }
289 }
290
pentium_cmovae_r16_rm16()291 void i386_device::pentium_cmovae_r16_rm16() // Opcode 0x0f 43
292 {
293 uint16_t src;
294 uint8_t modrm = FETCH();
295
296 if( modrm >= 0xc0 )
297 {
298 if (m_CF == 0)
299 {
300 src = LOAD_RM16(modrm);
301 STORE_REG16(modrm, src);
302 }
303 CYCLES(1); // TODO: correct cycle count
304 }
305 else
306 {
307 uint32_t ea = GetEA(modrm,0);
308 if (m_CF == 0)
309 {
310 src = READ16(ea);
311 STORE_REG16(modrm, src);
312 }
313 CYCLES(1); // TODO: correct cycle count
314 }
315 }
316
pentium_cmovae_r32_rm32()317 void i386_device::pentium_cmovae_r32_rm32() // Opcode 0x0f 43
318 {
319 uint32_t src;
320 uint8_t modrm = FETCH();
321
322 if( modrm >= 0xc0 )
323 {
324 if (m_CF == 0)
325 {
326 src = LOAD_RM32(modrm);
327 STORE_REG32(modrm, src);
328 }
329 CYCLES(1); // TODO: correct cycle count
330 }
331 else
332 {
333 uint32_t ea = GetEA(modrm,0);
334 if (m_CF == 0)
335 {
336 src = READ32(ea);
337 STORE_REG32(modrm, src);
338 }
339 CYCLES(1); // TODO: correct cycle count
340 }
341 }
342
pentium_cmove_r16_rm16()343 void i386_device::pentium_cmove_r16_rm16() // Opcode 0x0f 44
344 {
345 uint16_t src;
346 uint8_t modrm = FETCH();
347
348 if( modrm >= 0xc0 )
349 {
350 if (m_ZF == 1)
351 {
352 src = LOAD_RM16(modrm);
353 STORE_REG16(modrm, src);
354 }
355 CYCLES(1); // TODO: correct cycle count
356 }
357 else
358 {
359 uint32_t ea = GetEA(modrm,0);
360 if (m_ZF == 1)
361 {
362 src = READ16(ea);
363 STORE_REG16(modrm, src);
364 }
365 CYCLES(1); // TODO: correct cycle count
366 }
367 }
368
pentium_cmove_r32_rm32()369 void i386_device::pentium_cmove_r32_rm32() // Opcode 0x0f 44
370 {
371 uint32_t src;
372 uint8_t modrm = FETCH();
373
374 if( modrm >= 0xc0 )
375 {
376 if (m_ZF == 1)
377 {
378 src = LOAD_RM32(modrm);
379 STORE_REG32(modrm, src);
380 }
381 CYCLES(1); // TODO: correct cycle count
382 }
383 else
384 {
385 uint32_t ea = GetEA(modrm,0);
386 if (m_ZF == 1)
387 {
388 src = READ32(ea);
389 STORE_REG32(modrm, src);
390 }
391 CYCLES(1); // TODO: correct cycle count
392 }
393 }
394
pentium_cmovne_r16_rm16()395 void i386_device::pentium_cmovne_r16_rm16() // Opcode 0x0f 45
396 {
397 uint16_t src;
398 uint8_t modrm = FETCH();
399
400 if( modrm >= 0xc0 )
401 {
402 if (m_ZF == 0)
403 {
404 src = LOAD_RM16(modrm);
405 STORE_REG16(modrm, src);
406 }
407 CYCLES(1); // TODO: correct cycle count
408 }
409 else
410 {
411 uint32_t ea = GetEA(modrm,0);
412 if (m_ZF == 0)
413 {
414 src = READ16(ea);
415 STORE_REG16(modrm, src);
416 }
417 CYCLES(1); // TODO: correct cycle count
418 }
419 }
420
pentium_cmovne_r32_rm32()421 void i386_device::pentium_cmovne_r32_rm32() // Opcode 0x0f 45
422 {
423 uint32_t src;
424 uint8_t modrm = FETCH();
425
426 if( modrm >= 0xc0 )
427 {
428 if (m_ZF == 0)
429 {
430 src = LOAD_RM32(modrm);
431 STORE_REG32(modrm, src);
432 }
433 CYCLES(1); // TODO: correct cycle count
434 }
435 else
436 {
437 uint32_t ea = GetEA(modrm,0);
438 if (m_ZF == 0)
439 {
440 src = READ32(ea);
441 STORE_REG32(modrm, src);
442 }
443 CYCLES(1); // TODO: correct cycle count
444 }
445 }
446
pentium_cmovbe_r16_rm16()447 void i386_device::pentium_cmovbe_r16_rm16() // Opcode 0x0f 46
448 {
449 uint16_t src;
450 uint8_t modrm = FETCH();
451
452 if( modrm >= 0xc0 )
453 {
454 if ((m_CF == 1) || (m_ZF == 1))
455 {
456 src = LOAD_RM16(modrm);
457 STORE_REG16(modrm, src);
458 }
459 CYCLES(1); // TODO: correct cycle count
460 }
461 else
462 {
463 uint32_t ea = GetEA(modrm,0);
464 if ((m_CF == 1) || (m_ZF == 1))
465 {
466 src = READ16(ea);
467 STORE_REG16(modrm, src);
468 }
469 CYCLES(1); // TODO: correct cycle count
470 }
471 }
472
pentium_cmovbe_r32_rm32()473 void i386_device::pentium_cmovbe_r32_rm32() // Opcode 0x0f 46
474 {
475 uint32_t src;
476 uint8_t modrm = FETCH();
477
478 if( modrm >= 0xc0 )
479 {
480 if ((m_CF == 1) || (m_ZF == 1))
481 {
482 src = LOAD_RM32(modrm);
483 STORE_REG32(modrm, src);
484 }
485 CYCLES(1); // TODO: correct cycle count
486 }
487 else
488 {
489 uint32_t ea = GetEA(modrm,0);
490 if ((m_CF == 1) || (m_ZF == 1))
491 {
492 src = READ32(ea);
493 STORE_REG32(modrm, src);
494 }
495 CYCLES(1); // TODO: correct cycle count
496 }
497 }
498
pentium_cmova_r16_rm16()499 void i386_device::pentium_cmova_r16_rm16() // Opcode 0x0f 47
500 {
501 uint16_t src;
502 uint8_t modrm = FETCH();
503
504 if( modrm >= 0xc0 )
505 {
506 if ((m_CF == 0) && (m_ZF == 0))
507 {
508 src = LOAD_RM16(modrm);
509 STORE_REG16(modrm, src);
510 }
511 CYCLES(1); // TODO: correct cycle count
512 }
513 else
514 {
515 uint32_t ea = GetEA(modrm,0);
516 if ((m_CF == 0) && (m_ZF == 0))
517 {
518 src = READ16(ea);
519 STORE_REG16(modrm, src);
520 }
521 CYCLES(1); // TODO: correct cycle count
522 }
523 }
524
pentium_cmova_r32_rm32()525 void i386_device::pentium_cmova_r32_rm32() // Opcode 0x0f 47
526 {
527 uint32_t src;
528 uint8_t modrm = FETCH();
529
530 if( modrm >= 0xc0 )
531 {
532 if ((m_CF == 0) && (m_ZF == 0))
533 {
534 src = LOAD_RM32(modrm);
535 STORE_REG32(modrm, src);
536 }
537 CYCLES(1); // TODO: correct cycle count
538 }
539 else
540 {
541 uint32_t ea = GetEA(modrm,0);
542 if ((m_CF == 0) && (m_ZF == 0))
543 {
544 src = READ32(ea);
545 STORE_REG32(modrm, src);
546 }
547 CYCLES(1); // TODO: correct cycle count
548 }
549 }
550
pentium_cmovs_r16_rm16()551 void i386_device::pentium_cmovs_r16_rm16() // Opcode 0x0f 48
552 {
553 uint16_t src;
554 uint8_t modrm = FETCH();
555
556 if( modrm >= 0xc0 )
557 {
558 if (m_SF == 1)
559 {
560 src = LOAD_RM16(modrm);
561 STORE_REG16(modrm, src);
562 }
563 CYCLES(1); // TODO: correct cycle count
564 }
565 else
566 {
567 uint32_t ea = GetEA(modrm,0);
568 if (m_SF == 1)
569 {
570 src = READ16(ea);
571 STORE_REG16(modrm, src);
572 }
573 CYCLES(1); // TODO: correct cycle count
574 }
575 }
576
pentium_cmovs_r32_rm32()577 void i386_device::pentium_cmovs_r32_rm32() // Opcode 0x0f 48
578 {
579 uint32_t src;
580 uint8_t modrm = FETCH();
581
582 if( modrm >= 0xc0 )
583 {
584 if (m_SF == 1)
585 {
586 src = LOAD_RM32(modrm);
587 STORE_REG32(modrm, src);
588 }
589 CYCLES(1); // TODO: correct cycle count
590 }
591 else
592 {
593 uint32_t ea = GetEA(modrm,0);
594 if (m_SF == 1)
595 {
596 src = READ32(ea);
597 STORE_REG32(modrm, src);
598 }
599 CYCLES(1); // TODO: correct cycle count
600 }
601 }
602
pentium_cmovns_r16_rm16()603 void i386_device::pentium_cmovns_r16_rm16() // Opcode 0x0f 49
604 {
605 uint16_t src;
606 uint8_t modrm = FETCH();
607
608 if( modrm >= 0xc0 )
609 {
610 if (m_SF == 0)
611 {
612 src = LOAD_RM16(modrm);
613 STORE_REG16(modrm, src);
614 }
615 CYCLES(1); // TODO: correct cycle count
616 }
617 else
618 {
619 uint32_t ea = GetEA(modrm,0);
620 if (m_SF == 0)
621 {
622 src = READ16(ea);
623 STORE_REG16(modrm, src);
624 }
625 CYCLES(1); // TODO: correct cycle count
626 }
627 }
628
pentium_cmovns_r32_rm32()629 void i386_device::pentium_cmovns_r32_rm32() // Opcode 0x0f 49
630 {
631 uint32_t src;
632 uint8_t modrm = FETCH();
633
634 if( modrm >= 0xc0 )
635 {
636 if (m_SF == 0)
637 {
638 src = LOAD_RM32(modrm);
639 STORE_REG32(modrm, src);
640 }
641 CYCLES(1); // TODO: correct cycle count
642 }
643 else
644 {
645 uint32_t ea = GetEA(modrm,0);
646 if (m_SF == 0)
647 {
648 src = READ32(ea);
649 STORE_REG32(modrm, src);
650 }
651 CYCLES(1); // TODO: correct cycle count
652 }
653 }
654
pentium_cmovp_r16_rm16()655 void i386_device::pentium_cmovp_r16_rm16() // Opcode 0x0f 4a
656 {
657 uint16_t src;
658 uint8_t modrm = FETCH();
659
660 if( modrm >= 0xc0 )
661 {
662 if (m_PF == 1)
663 {
664 src = LOAD_RM16(modrm);
665 STORE_REG16(modrm, src);
666 }
667 CYCLES(1); // TODO: correct cycle count
668 }
669 else
670 {
671 uint32_t ea = GetEA(modrm,0);
672 if (m_PF == 1)
673 {
674 src = READ16(ea);
675 STORE_REG16(modrm, src);
676 }
677 CYCLES(1); // TODO: correct cycle count
678 }
679 }
680
pentium_cmovp_r32_rm32()681 void i386_device::pentium_cmovp_r32_rm32() // Opcode 0x0f 4a
682 {
683 uint32_t src;
684 uint8_t modrm = FETCH();
685
686 if( modrm >= 0xc0 )
687 {
688 if (m_PF == 1)
689 {
690 src = LOAD_RM32(modrm);
691 STORE_REG32(modrm, src);
692 }
693 CYCLES(1); // TODO: correct cycle count
694 }
695 else
696 {
697 uint32_t ea = GetEA(modrm,0);
698 if (m_PF == 1)
699 {
700 src = READ32(ea);
701 STORE_REG32(modrm, src);
702 }
703 CYCLES(1); // TODO: correct cycle count
704 }
705 }
706
pentium_cmovnp_r16_rm16()707 void i386_device::pentium_cmovnp_r16_rm16() // Opcode 0x0f 4b
708 {
709 uint16_t src;
710 uint8_t modrm = FETCH();
711
712 if( modrm >= 0xc0 )
713 {
714 if (m_PF == 0)
715 {
716 src = LOAD_RM16(modrm);
717 STORE_REG16(modrm, src);
718 }
719 CYCLES(1); // TODO: correct cycle count
720 }
721 else
722 {
723 uint32_t ea = GetEA(modrm,0);
724 if (m_PF == 0)
725 {
726 src = READ16(ea);
727 STORE_REG16(modrm, src);
728 }
729 CYCLES(1); // TODO: correct cycle count
730 }
731 }
732
pentium_cmovnp_r32_rm32()733 void i386_device::pentium_cmovnp_r32_rm32() // Opcode 0x0f 4b
734 {
735 uint32_t src;
736 uint8_t modrm = FETCH();
737
738 if( modrm >= 0xc0 )
739 {
740 if (m_PF == 0)
741 {
742 src = LOAD_RM32(modrm);
743 STORE_REG32(modrm, src);
744 }
745 CYCLES(1); // TODO: correct cycle count
746 }
747 else
748 {
749 uint32_t ea = GetEA(modrm,0);
750 if (m_PF == 0)
751 {
752 src = READ32(ea);
753 STORE_REG32(modrm, src);
754 }
755 CYCLES(1); // TODO: correct cycle count
756 }
757 }
758
pentium_cmovl_r16_rm16()759 void i386_device::pentium_cmovl_r16_rm16() // Opcode 0x0f 4c
760 {
761 uint16_t src;
762 uint8_t modrm = FETCH();
763
764 if( modrm >= 0xc0 )
765 {
766 if (m_SF != m_OF)
767 {
768 src = LOAD_RM16(modrm);
769 STORE_REG16(modrm, src);
770 }
771 CYCLES(1); // TODO: correct cycle count
772 }
773 else
774 {
775 uint32_t ea = GetEA(modrm,0);
776 if (m_SF != m_OF)
777 {
778 src = READ16(ea);
779 STORE_REG16(modrm, src);
780 }
781 CYCLES(1); // TODO: correct cycle count
782 }
783 }
784
pentium_cmovl_r32_rm32()785 void i386_device::pentium_cmovl_r32_rm32() // Opcode 0x0f 4c
786 {
787 uint32_t src;
788 uint8_t modrm = FETCH();
789
790 if( modrm >= 0xc0 )
791 {
792 if (m_SF != m_OF)
793 {
794 src = LOAD_RM32(modrm);
795 STORE_REG32(modrm, src);
796 }
797 CYCLES(1); // TODO: correct cycle count
798 }
799 else
800 {
801 uint32_t ea = GetEA(modrm,0);
802 if (m_SF != m_OF)
803 {
804 src = READ32(ea);
805 STORE_REG32(modrm, src);
806 }
807 CYCLES(1); // TODO: correct cycle count
808 }
809 }
810
pentium_cmovge_r16_rm16()811 void i386_device::pentium_cmovge_r16_rm16() // Opcode 0x0f 4d
812 {
813 uint16_t src;
814 uint8_t modrm = FETCH();
815
816 if( modrm >= 0xc0 )
817 {
818 if (m_SF == m_OF)
819 {
820 src = LOAD_RM16(modrm);
821 STORE_REG16(modrm, src);
822 }
823 CYCLES(1); // TODO: correct cycle count
824 }
825 else
826 {
827 uint32_t ea = GetEA(modrm,0);
828 if (m_SF == m_OF)
829 {
830 src = READ16(ea);
831 STORE_REG16(modrm, src);
832 }
833 CYCLES(1); // TODO: correct cycle count
834 }
835 }
836
pentium_cmovge_r32_rm32()837 void i386_device::pentium_cmovge_r32_rm32() // Opcode 0x0f 4d
838 {
839 uint32_t src;
840 uint8_t modrm = FETCH();
841
842 if( modrm >= 0xc0 )
843 {
844 if (m_SF == m_OF)
845 {
846 src = LOAD_RM32(modrm);
847 STORE_REG32(modrm, src);
848 }
849 CYCLES(1); // TODO: correct cycle count
850 }
851 else
852 {
853 uint32_t ea = GetEA(modrm,0);
854 if (m_SF == m_OF)
855 {
856 src = READ32(ea);
857 STORE_REG32(modrm, src);
858 }
859 CYCLES(1); // TODO: correct cycle count
860 }
861 }
862
pentium_cmovle_r16_rm16()863 void i386_device::pentium_cmovle_r16_rm16() // Opcode 0x0f 4e
864 {
865 uint16_t src;
866 uint8_t modrm = FETCH();
867
868 if( modrm >= 0xc0 )
869 {
870 if ((m_ZF == 1) || (m_SF != m_OF))
871 {
872 src = LOAD_RM16(modrm);
873 STORE_REG16(modrm, src);
874 }
875 CYCLES(1); // TODO: correct cycle count
876 }
877 else
878 {
879 uint32_t ea = GetEA(modrm,0);
880 if ((m_ZF == 1) || (m_SF != m_OF))
881 {
882 src = READ16(ea);
883 STORE_REG16(modrm, src);
884 }
885 CYCLES(1); // TODO: correct cycle count
886 }
887 }
888
pentium_cmovle_r32_rm32()889 void i386_device::pentium_cmovle_r32_rm32() // Opcode 0x0f 4e
890 {
891 uint32_t src;
892 uint8_t modrm = FETCH();
893
894 if( modrm >= 0xc0 )
895 {
896 if ((m_ZF == 1) || (m_SF != m_OF))
897 {
898 src = LOAD_RM32(modrm);
899 STORE_REG32(modrm, src);
900 }
901 CYCLES(1); // TODO: correct cycle count
902 }
903 else
904 {
905 uint32_t ea = GetEA(modrm,0);
906 if ((m_ZF == 1) || (m_SF != m_OF))
907 {
908 src = READ32(ea);
909 STORE_REG32(modrm, src);
910 }
911 CYCLES(1); // TODO: correct cycle count
912 }
913 }
914
pentium_cmovg_r16_rm16()915 void i386_device::pentium_cmovg_r16_rm16() // Opcode 0x0f 4f
916 {
917 uint16_t src;
918 uint8_t modrm = FETCH();
919
920 if( modrm >= 0xc0 )
921 {
922 if ((m_ZF == 0) && (m_SF == m_OF))
923 {
924 src = LOAD_RM16(modrm);
925 STORE_REG16(modrm, src);
926 }
927 CYCLES(1); // TODO: correct cycle count
928 }
929 else
930 {
931 uint32_t ea = GetEA(modrm,0);
932 if ((m_ZF == 0) && (m_SF == m_OF))
933 {
934 src = READ16(ea);
935 STORE_REG16(modrm, src);
936 }
937 CYCLES(1); // TODO: correct cycle count
938 }
939 }
940
pentium_cmovg_r32_rm32()941 void i386_device::pentium_cmovg_r32_rm32() // Opcode 0x0f 4f
942 {
943 uint32_t src;
944 uint8_t modrm = FETCH();
945
946 if( modrm >= 0xc0 )
947 {
948 if ((m_ZF == 0) && (m_SF == m_OF))
949 {
950 src = LOAD_RM32(modrm);
951 STORE_REG32(modrm, src);
952 }
953 CYCLES(1); // TODO: correct cycle count
954 }
955 else
956 {
957 uint32_t ea = GetEA(modrm,0);
958 if ((m_ZF == 0) && (m_SF == m_OF))
959 {
960 src = READ32(ea);
961 STORE_REG32(modrm, src);
962 }
963 CYCLES(1); // TODO: correct cycle count
964 }
965 }
966
pentium_movnti_m16_r16()967 void i386_device::pentium_movnti_m16_r16() // Opcode 0f c3
968 {
969 uint8_t modrm = FETCH();
970 if( modrm >= 0xc0 ) {
971 // unsupported by cpu
972 CYCLES(1); // TODO: correct cycle count
973 } else {
974 // TODO: manage the cache if present
975 uint32_t ea = GetEA(modrm, 0);
976 WRITE16(ea,LOAD_RM16(modrm));
977 CYCLES(1); // TODO: correct cycle count
978 }
979 }
980
pentium_movnti_m32_r32()981 void i386_device::pentium_movnti_m32_r32() // Opcode 0f c3
982 {
983 uint8_t modrm = FETCH();
984 if( modrm >= 0xc0 ) {
985 // unsupported by cpu
986 CYCLES(1); // TODO: correct cycle count
987 } else {
988 // TODO: manage the cache if present
989 uint32_t ea = GetEA(modrm, 0);
990 WRITE32(ea,LOAD_RM32(modrm));
991 CYCLES(1); // TODO: correct cycle count
992 }
993 }
994
i386_cyrix_special()995 void i386_device::i386_cyrix_special() // Opcode 0x0f 3a-3d
996 {
997 /*
998 0f 3a BB0_RESET (set BB0 pointer = base)
999 0f 3b BB1_RESET (set BB1 pointer = base)
1000 0f 3c CPU_WRITE (write special CPU memory-mapped register, [ebx] = eax)
1001 0f 3d CPU_READ (read special CPU memory-mapped register, eax, = [ebx])
1002 */
1003
1004 CYCLES(1);
1005 }
1006
i386_cyrix_unknown()1007 void i386_device::i386_cyrix_unknown() // Opcode 0x0f 74
1008 {
1009 logerror("Unemulated 0x0f 0x74 opcode called\n");
1010
1011 CYCLES(1);
1012 }
1013
pentium_cmpxchg8b_m64()1014 void i386_device::pentium_cmpxchg8b_m64() // Opcode 0x0f c7
1015 {
1016 uint8_t modm = FETCH();
1017 if( modm >= 0xc0 ) {
1018 report_invalid_modrm("cmpxchg8b_m64", modm);
1019 } else {
1020 uint32_t ea = GetEA(modm, 0);
1021 uint64_t value = READ64(ea);
1022 uint64_t edx_eax = (((uint64_t) REG32(EDX)) << 32) | REG32(EAX);
1023 uint64_t ecx_ebx = (((uint64_t) REG32(ECX)) << 32) | REG32(EBX);
1024
1025 if( value == edx_eax ) {
1026 WRITE64(ea, ecx_ebx);
1027 m_ZF = 1;
1028 CYCLES(CYCLES_CMPXCHG_REG_MEM_T);
1029 } else {
1030 REG32(EDX) = (uint32_t) (value >> 32);
1031 REG32(EAX) = (uint32_t) (value >> 0);
1032 m_ZF = 0;
1033 CYCLES(CYCLES_CMPXCHG_REG_MEM_F);
1034 }
1035 }
1036 }
1037
pentium_movntq_m64_r64()1038 void i386_device::pentium_movntq_m64_r64() // Opcode 0f e7
1039 {
1040 //MMXPROLOG(); // TODO: check if needed
1041 uint8_t modrm = FETCH();
1042 if( modrm >= 0xc0 ) {
1043 CYCLES(1); // unsupported
1044 } else {
1045 // TODO: manage the cache if present
1046 uint32_t ea = GetEA(modrm, 0);
1047 WRITEMMX(ea, MMX((modrm >> 3) & 0x7));
1048 CYCLES(1); // TODO: correct cycle count
1049 }
1050 }
1051
pentium_maskmovq_r64_r64()1052 void i386_device::pentium_maskmovq_r64_r64() // Opcode 0f f7
1053 {
1054 int s,m,n;
1055 uint8_t modm = FETCH();
1056 uint32_t ea = GetEA(7, 0); // ds:di/edi/rdi register
1057 MMXPROLOG();
1058 s=(modm >> 3) & 7;
1059 m=modm & 7;
1060 for (n=0;n <= 7;n++)
1061 if (MMX(m).b[n] & 127)
1062 WRITE8(ea+n, MMX(s).b[n]);
1063 }
1064
sse_maskmovdqu_r128_r128()1065 void i386_device::sse_maskmovdqu_r128_r128() // Opcode 66 0f f7
1066 {
1067 int s,m,n;
1068 uint8_t modm = FETCH();
1069 uint32_t ea = GetEA(7, 0); // ds:di/edi/rdi register
1070 s=(modm >> 3) & 7;
1071 m=modm & 7;
1072 for (n=0;n < 16;n++)
1073 if (XMM(m).b[n] & 127)
1074 WRITE8(ea+n, XMM(s).b[n]);
1075 }
1076
pentium_popcnt_r16_rm16()1077 void i386_device::pentium_popcnt_r16_rm16() // Opcode f3 0f b8
1078 {
1079 uint16_t src;
1080 uint8_t modrm = FETCH();
1081 int n,count;
1082
1083 if( modrm >= 0xc0 ) {
1084 src = LOAD_RM16(modrm);
1085 } else {
1086 uint32_t ea = GetEA(modrm,0);
1087 src = READ16(ea);
1088 }
1089 count=0;
1090 for (n=0;n < 16;n++) {
1091 count=count+(src & 1);
1092 src=src >> 1;
1093 }
1094 STORE_REG16(modrm, count);
1095 CYCLES(1); // TODO: correct cycle count
1096 }
1097
pentium_popcnt_r32_rm32()1098 void i386_device::pentium_popcnt_r32_rm32() // Opcode f3 0f b8
1099 {
1100 uint32_t src;
1101 uint8_t modrm = FETCH();
1102 int n,count;
1103
1104 if( modrm >= 0xc0 ) {
1105 src = LOAD_RM32(modrm);
1106 } else {
1107 uint32_t ea = GetEA(modrm,0);
1108 src = READ32(ea);
1109 }
1110 count=0;
1111 for (n=0;n < 32;n++) {
1112 count=count+(src & 1);
1113 src=src >> 1;
1114 }
1115 STORE_REG32(modrm, count);
1116 CYCLES(1); // TODO: correct cycle count
1117 }
1118
pentium_tzcnt_r16_rm16()1119 void i386_device::pentium_tzcnt_r16_rm16()
1120 {
1121 // for CPUs that don't support TZCNT, fall back to BSF
1122 i386_bsf_r16_rm16();
1123 // TODO: actually implement TZCNT
1124 }
1125
pentium_tzcnt_r32_rm32()1126 void i386_device::pentium_tzcnt_r32_rm32()
1127 {
1128 // for CPUs that don't support TZCNT, fall back to BSF
1129 i386_bsf_r32_rm32();
1130 // TODO: actually implement TZCNT
1131 }
1132
SaturatedSignedWordToSignedByte(int16_t word)1133 static inline int8_t SaturatedSignedWordToSignedByte(int16_t word)
1134 {
1135 if (word > 127)
1136 return 127;
1137 if (word < -128)
1138 return -128;
1139 return (int8_t)word;
1140 }
1141
SaturatedSignedWordToUnsignedByte(int16_t word)1142 static inline uint8_t SaturatedSignedWordToUnsignedByte(int16_t word)
1143 {
1144 if (word > 255)
1145 return 255;
1146 if (word < 0)
1147 return 0;
1148 return (uint8_t)word;
1149 }
1150
SaturatedSignedDwordToSignedWord(int32_t dword)1151 static inline int16_t SaturatedSignedDwordToSignedWord(int32_t dword)
1152 {
1153 if (dword > 32767)
1154 return 32767;
1155 if (dword < -32768)
1156 return -32768;
1157 return (int16_t)dword;
1158 }
1159
SaturatedSignedDwordToUnsignedWord(int32_t dword)1160 static inline uint16_t SaturatedSignedDwordToUnsignedWord(int32_t dword)
1161 {
1162 if (dword > 65535)
1163 return 65535;
1164 if (dword < 0)
1165 return 0;
1166 return (uint16_t)dword;
1167 }
1168
mmx_group_0f71()1169 void i386_device::mmx_group_0f71() // Opcode 0f 71
1170 {
1171 uint8_t modm = FETCH();
1172 uint8_t imm8 = FETCH();
1173 MMXPROLOG();
1174 if( modm >= 0xc0 ) {
1175 switch ( (modm & 0x38) >> 3 )
1176 {
1177 case 2: // psrlw
1178 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] >> imm8;
1179 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] >> imm8;
1180 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] >> imm8;
1181 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] >> imm8;
1182 break;
1183 case 4: // psraw
1184 MMX(modm & 7).s[0]=MMX(modm & 7).s[0] >> imm8;
1185 MMX(modm & 7).s[1]=MMX(modm & 7).s[1] >> imm8;
1186 MMX(modm & 7).s[2]=MMX(modm & 7).s[2] >> imm8;
1187 MMX(modm & 7).s[3]=MMX(modm & 7).s[3] >> imm8;
1188 break;
1189 case 6: // psllw
1190 MMX(modm & 7).w[0]=MMX(modm & 7).w[0] << imm8;
1191 MMX(modm & 7).w[1]=MMX(modm & 7).w[1] << imm8;
1192 MMX(modm & 7).w[2]=MMX(modm & 7).w[2] << imm8;
1193 MMX(modm & 7).w[3]=MMX(modm & 7).w[3] << imm8;
1194 break;
1195 default:
1196 report_invalid_modrm("mmx_group0f71", modm);
1197 }
1198 }
1199 }
1200
sse_group_660f71()1201 void i386_device::sse_group_660f71() // Opcode 66 0f 71
1202 {
1203 uint8_t modm = FETCH();
1204 uint8_t imm8 = FETCH();
1205 if (modm >= 0xc0) {
1206 switch ((modm & 0x38) >> 3)
1207 {
1208 case 2: // psrlw
1209 for (int n = 0; n < 8;n++)
1210 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] >> imm8;
1211 break;
1212 case 4: // psraw
1213 for (int n = 0; n < 8;n++)
1214 XMM(modm & 7).s[n] = XMM(modm & 7).s[n] >> imm8;
1215 break;
1216 case 6: // psllw
1217 for (int n = 0; n < 8;n++)
1218 XMM(modm & 7).w[n] = XMM(modm & 7).w[n] << imm8;
1219 break;
1220 default:
1221 report_invalid_modrm("mmx_group660f71", modm);
1222 }
1223 }
1224 }
1225
mmx_group_0f72()1226 void i386_device::mmx_group_0f72() // Opcode 0f 72
1227 {
1228 uint8_t modm = FETCH();
1229 uint8_t imm8 = FETCH();
1230 MMXPROLOG();
1231 if( modm >= 0xc0 ) {
1232 switch ( (modm & 0x38) >> 3 )
1233 {
1234 case 2: // psrld
1235 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] >> imm8;
1236 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] >> imm8;
1237 break;
1238 case 4: // psrad
1239 MMX(modm & 7).i[0]=MMX(modm & 7).i[0] >> imm8;
1240 MMX(modm & 7).i[1]=MMX(modm & 7).i[1] >> imm8;
1241 break;
1242 case 6: // pslld
1243 MMX(modm & 7).d[0]=MMX(modm & 7).d[0] << imm8;
1244 MMX(modm & 7).d[1]=MMX(modm & 7).d[1] << imm8;
1245 break;
1246 default:
1247 report_invalid_modrm("mmx_group0f72", modm);
1248 }
1249 }
1250 }
1251
sse_group_660f72()1252 void i386_device::sse_group_660f72() // Opcode 66 0f 72
1253 {
1254 uint8_t modm = FETCH();
1255 uint8_t imm8 = FETCH();
1256 if (modm >= 0xc0) {
1257 switch ((modm & 0x38) >> 3)
1258 {
1259 case 2: // psrld
1260 for (int n = 0; n < 4;n++)
1261 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] >> imm8;
1262 break;
1263 case 4: // psrad
1264 for (int n = 0; n < 4;n++)
1265 XMM(modm & 7).i[n] = XMM(modm & 7).i[n] >> imm8;
1266 break;
1267 case 6: // pslld
1268 for (int n = 0; n < 4;n++)
1269 XMM(modm & 7).d[n] = XMM(modm & 7).d[n] << imm8;
1270 break;
1271 default:
1272 report_invalid_modrm("mmx_group660f72", modm);
1273 }
1274 }
1275 }
1276
mmx_group_0f73()1277 void i386_device::mmx_group_0f73() // Opcode 0f 73
1278 {
1279 uint8_t modm = FETCH();
1280 uint8_t imm8 = FETCH();
1281 MMXPROLOG();
1282 if( modm >= 0xc0 ) {
1283 switch ( (modm & 0x38) >> 3 )
1284 {
1285 case 2: // psrlq
1286 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q >> imm8;
1287 break;
1288 case 6: // psllq
1289 MMX(modm & 7).q = imm8 > 63 ? 0 : MMX(modm & 7).q << imm8;
1290 break;
1291 default:
1292 report_invalid_modrm("mmx_group0f73", modm);
1293 }
1294 }
1295 }
1296
sse_group_660f73()1297 void i386_device::sse_group_660f73() // Opcode 66 0f 73
1298 {
1299 uint64_t t0;
1300 uint8_t modm = FETCH();
1301 uint8_t imm8 = FETCH();
1302 if (modm >= 0xc0) {
1303 switch ((modm & 0x38) >> 3)
1304 {
1305 case 2: // psrlq
1306 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] >> imm8;
1307 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] >> imm8;
1308 break;
1309 case 3: // psrldq
1310 if (imm8 >= 16)
1311 {
1312 XMM(modm & 7).q[0] = 0;
1313 XMM(modm & 7).q[1] = 0;
1314 }
1315 else if (imm8 >= 8)
1316 {
1317 imm8 = (imm8 & 7) << 3;
1318 XMM(modm & 7).q[0] = XMM(modm & 7).q[1] >> imm8;
1319 XMM(modm & 7).q[1] = 0;
1320 }
1321 else if (imm8)
1322 {
1323 t0 = XMM(modm & 7).q[0];
1324 imm8 = imm8 << 3;
1325 XMM(modm & 7).q[0] = (XMM(modm & 7).q[1] << (64 - imm8)) | (t0 >> imm8);
1326 XMM(modm & 7).q[1] = t0 >> imm8;
1327 }
1328 break;
1329 case 6: // psllq
1330 XMM(modm & 7).q[0] = imm8 > 63 ? 0 : XMM(modm & 7).q[0] << imm8;
1331 XMM(modm & 7).q[1] = imm8 > 63 ? 0 : XMM(modm & 7).q[1] << imm8;
1332 break;
1333 case 7: // pslldq
1334 if (imm8 >= 16)
1335 {
1336 XMM(modm & 7).q[0] = 0;
1337 XMM(modm & 7).q[1] = 0;
1338 }
1339 else if (imm8 >= 8)
1340 {
1341 imm8 = (imm8 & 7) << 3;
1342 XMM(modm & 7).q[1] = XMM(modm & 7).q[0] << imm8;
1343 XMM(modm & 7).q[0] = 0;
1344 }
1345 else if (imm8)
1346 {
1347 imm8 = imm8 << 3;
1348 XMM(modm & 7).q[1] = (XMM(modm & 7).q[0] >> (64 - imm8)) | (XMM(modm & 7).q[1] << imm8);
1349 XMM(modm & 7).q[0] = XMM(modm & 7).q[0] << imm8;
1350 }
1351 break;
1352 default:
1353 report_invalid_modrm("sse_group660f73", modm);
1354 }
1355 }
1356 }
1357
mmx_psrlw_r64_rm64()1358 void i386_device::mmx_psrlw_r64_rm64() // Opcode 0f d1
1359 {
1360 MMXPROLOG();
1361 uint8_t modrm = FETCH();
1362 if( modrm >= 0xc0 ) {
1363 int count=(int)MMX(modrm & 7).q;
1364 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1365 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1366 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1367 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1368 } else {
1369 MMX_REG src;
1370 uint32_t ea = GetEA(modrm, 0);
1371 READMMX(ea, src);
1372 int count=(int)src.q;
1373 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] >> count;
1374 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] >> count;
1375 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] >> count;
1376 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] >> count;
1377 }
1378 CYCLES(1); // TODO: correct cycle count
1379 }
1380
mmx_psrld_r64_rm64()1381 void i386_device::mmx_psrld_r64_rm64() // Opcode 0f d2
1382 {
1383 MMXPROLOG();
1384 uint8_t modrm = FETCH();
1385 if( modrm >= 0xc0 ) {
1386 int count=(int)MMX(modrm & 7).q;
1387 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1388 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1389 } else {
1390 MMX_REG src;
1391 uint32_t ea = GetEA(modrm, 0);
1392 READMMX(ea, src);
1393 int count=(int)src.q;
1394 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] >> count;
1395 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] >> count;
1396 }
1397 CYCLES(1); // TODO: correct cycle count
1398 }
1399
mmx_psrlq_r64_rm64()1400 void i386_device::mmx_psrlq_r64_rm64() // Opcode 0f d3
1401 {
1402 MMXPROLOG();
1403 uint8_t modrm = FETCH();
1404 if( modrm >= 0xc0 ) {
1405 int count=(int)MMX(modrm & 7).q;
1406 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1407 } else {
1408 MMX_REG src;
1409 uint32_t ea = GetEA(modrm, 0);
1410 READMMX(ea, src);
1411 int count=(int)src.q;
1412 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q >> count;
1413 }
1414 CYCLES(1); // TODO: correct cycle count
1415 }
1416
mmx_paddq_r64_rm64()1417 void i386_device::mmx_paddq_r64_rm64() // Opcode 0f d4
1418 {
1419 MMXPROLOG();
1420 uint8_t modrm = FETCH();
1421 if( modrm >= 0xc0 ) {
1422 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+MMX(modrm & 7).q;
1423 } else {
1424 MMX_REG src;
1425 uint32_t ea = GetEA(modrm, 0);
1426 READMMX(ea, src);
1427 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q+src.q;
1428 }
1429 CYCLES(1); // TODO: correct cycle count
1430 }
1431
mmx_pmullw_r64_rm64()1432 void i386_device::mmx_pmullw_r64_rm64() // Opcode 0f d5
1433 {
1434 MMXPROLOG();
1435 uint8_t modrm = FETCH();
1436 if( modrm >= 0xc0 ) {
1437 MMX((modrm >> 3) & 0x7).w[0]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)MMX(modrm & 7).s[0]) & 0xffff;
1438 MMX((modrm >> 3) & 0x7).w[1]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)MMX(modrm & 7).s[1]) & 0xffff;
1439 MMX((modrm >> 3) & 0x7).w[2]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)MMX(modrm & 7).s[2]) & 0xffff;
1440 MMX((modrm >> 3) & 0x7).w[3]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)MMX(modrm & 7).s[3]) & 0xffff;
1441 } else {
1442 MMX_REG src;
1443 uint32_t ea = GetEA(modrm, 0);
1444 READMMX(ea, src);
1445 MMX((modrm >> 3) & 0x7).w[0]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)src.s[0]) & 0xffff;
1446 MMX((modrm >> 3) & 0x7).w[1]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)src.s[1]) & 0xffff;
1447 MMX((modrm >> 3) & 0x7).w[2]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)src.s[2]) & 0xffff;
1448 MMX((modrm >> 3) & 0x7).w[3]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)src.s[3]) & 0xffff;
1449 }
1450 CYCLES(1); // TODO: correct cycle count
1451 }
1452
mmx_psubusb_r64_rm64()1453 void i386_device::mmx_psubusb_r64_rm64() // Opcode 0f d8
1454 {
1455 int n;
1456 MMXPROLOG();
1457 uint8_t modrm = FETCH();
1458 if( modrm >= 0xc0 ) {
1459 for (n=0;n < 8;n++)
1460 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 7).b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-MMX(modrm & 7).b[n];
1461 } else {
1462 MMX_REG src;
1463 uint32_t ea = GetEA(modrm, 0);
1464 READMMX(ea, src);
1465 for (n=0;n < 8;n++)
1466 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : MMX((modrm >> 3) & 0x7).b[n]-src.b[n];
1467 }
1468 CYCLES(1); // TODO: correct cycle count
1469 }
1470
mmx_psubusw_r64_rm64()1471 void i386_device::mmx_psubusw_r64_rm64() // Opcode 0f d9
1472 {
1473 int n;
1474 MMXPROLOG();
1475 uint8_t modrm = FETCH();
1476 if( modrm >= 0xc0 ) {
1477 for (n=0;n < 4;n++)
1478 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < MMX(modrm & 7).w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-MMX(modrm & 7).w[n];
1479 } else {
1480 MMX_REG src;
1481 uint32_t ea = GetEA(modrm, 0);
1482 READMMX(ea, src);
1483 for (n=0;n < 4;n++)
1484 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : MMX((modrm >> 3) & 0x7).w[n]-src.w[n];
1485 }
1486 CYCLES(1); // TODO: correct cycle count
1487 }
1488
mmx_pand_r64_rm64()1489 void i386_device::mmx_pand_r64_rm64() // Opcode 0f db
1490 {
1491 MMXPROLOG();
1492 uint8_t modrm = FETCH();
1493 if( modrm >= 0xc0 ) {
1494 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & MMX(modrm & 7).q;
1495 } else {
1496 MMX_REG src;
1497 uint32_t ea = GetEA(modrm, 0);
1498 READMMX(ea, src);
1499 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q & src.q;
1500 }
1501 CYCLES(1); // TODO: correct cycle count
1502 }
1503
mmx_paddusb_r64_rm64()1504 void i386_device::mmx_paddusb_r64_rm64() // Opcode 0f dc
1505 {
1506 int n;
1507 MMXPROLOG();
1508 uint8_t modrm = FETCH();
1509 if( modrm >= 0xc0 ) {
1510 for (n=0;n < 8;n++)
1511 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-MMX(modrm & 7).b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+MMX(modrm & 7).b[n];
1512 } else {
1513 MMX_REG src;
1514 uint32_t ea = GetEA(modrm, 0);
1515 READMMX(ea, src);
1516 for (n=0;n < 8;n++)
1517 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : MMX((modrm >> 3) & 0x7).b[n]+src.b[n];
1518 }
1519 CYCLES(1); // TODO: correct cycle count
1520 }
1521
mmx_paddusw_r64_rm64()1522 void i386_device::mmx_paddusw_r64_rm64() // Opcode 0f dd
1523 {
1524 int n;
1525 MMXPROLOG();
1526 uint8_t modrm = FETCH();
1527 if( modrm >= 0xc0 ) {
1528 for (n=0;n < 4;n++)
1529 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-MMX(modrm & 7).w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+MMX(modrm & 7).w[n];
1530 } else {
1531 MMX_REG src;
1532 uint32_t ea = GetEA(modrm, 0);
1533 READMMX(ea, src);
1534 for (n=0;n < 4;n++)
1535 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : MMX((modrm >> 3) & 0x7).w[n]+src.w[n];
1536 }
1537 CYCLES(1); // TODO: correct cycle count
1538 }
1539
mmx_pandn_r64_rm64()1540 void i386_device::mmx_pandn_r64_rm64() // Opcode 0f df
1541 {
1542 MMXPROLOG();
1543 uint8_t modrm = FETCH();
1544 if( modrm >= 0xc0 ) {
1545 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & MMX(modrm & 7).q;
1546 } else {
1547 MMX_REG src;
1548 uint32_t ea = GetEA(modrm, 0);
1549 READMMX(ea, src);
1550 MMX((modrm >> 3) & 0x7).q=(~MMX((modrm >> 3) & 0x7).q) & src.q;
1551 }
1552 CYCLES(1); // TODO: correct cycle count
1553 }
1554
mmx_psraw_r64_rm64()1555 void i386_device::mmx_psraw_r64_rm64() // Opcode 0f e1
1556 {
1557 MMXPROLOG();
1558 uint8_t modrm = FETCH();
1559 if( modrm >= 0xc0 ) {
1560 int count=(int)MMX(modrm & 7).q;
1561 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1562 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1563 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1564 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1565 } else {
1566 MMX_REG src;
1567 uint32_t ea = GetEA(modrm, 0);
1568 READMMX(ea, src);
1569 int count=(int)src.q;
1570 MMX((modrm >> 3) & 0x7).s[0]=MMX((modrm >> 3) & 0x7).s[0] >> count;
1571 MMX((modrm >> 3) & 0x7).s[1]=MMX((modrm >> 3) & 0x7).s[1] >> count;
1572 MMX((modrm >> 3) & 0x7).s[2]=MMX((modrm >> 3) & 0x7).s[2] >> count;
1573 MMX((modrm >> 3) & 0x7).s[3]=MMX((modrm >> 3) & 0x7).s[3] >> count;
1574 }
1575 CYCLES(1); // TODO: correct cycle count
1576 }
1577
mmx_psrad_r64_rm64()1578 void i386_device::mmx_psrad_r64_rm64() // Opcode 0f e2
1579 {
1580 MMXPROLOG();
1581 uint8_t modrm = FETCH();
1582 if( modrm >= 0xc0 ) {
1583 int count=(int)MMX(modrm & 7).q;
1584 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1585 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1586 } else {
1587 MMX_REG src;
1588 uint32_t ea = GetEA(modrm, 0);
1589 READMMX(ea, src);
1590 int count=(int)src.q;
1591 MMX((modrm >> 3) & 0x7).i[0]=MMX((modrm >> 3) & 0x7).i[0] >> count;
1592 MMX((modrm >> 3) & 0x7).i[1]=MMX((modrm >> 3) & 0x7).i[1] >> count;
1593 }
1594 CYCLES(1); // TODO: correct cycle count
1595 }
1596
mmx_pmulhw_r64_rm64()1597 void i386_device::mmx_pmulhw_r64_rm64() // Opcode 0f e5
1598 {
1599 MMXPROLOG();
1600 uint8_t modrm = FETCH();
1601 if( modrm >= 0xc0 ) {
1602 MMX((modrm >> 3) & 0x7).w[0]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)MMX(modrm & 7).s[0]) >> 16;
1603 MMX((modrm >> 3) & 0x7).w[1]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)MMX(modrm & 7).s[1]) >> 16;
1604 MMX((modrm >> 3) & 0x7).w[2]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)MMX(modrm & 7).s[2]) >> 16;
1605 MMX((modrm >> 3) & 0x7).w[3]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)MMX(modrm & 7).s[3]) >> 16;
1606 } else {
1607 MMX_REG src;
1608 uint32_t ea = GetEA(modrm, 0);
1609 READMMX(ea, src);
1610 MMX((modrm >> 3) & 0x7).w[0]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)src.s[0]) >> 16;
1611 MMX((modrm >> 3) & 0x7).w[1]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)src.s[1]) >> 16;
1612 MMX((modrm >> 3) & 0x7).w[2]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)src.s[2]) >> 16;
1613 MMX((modrm >> 3) & 0x7).w[3]=(uint32_t)((int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)src.s[3]) >> 16;
1614 }
1615 CYCLES(1); // TODO: correct cycle count
1616 }
1617
mmx_psubsb_r64_rm64()1618 void i386_device::mmx_psubsb_r64_rm64() // Opcode 0f e8
1619 {
1620 int n;
1621 MMXPROLOG();
1622 uint8_t modrm = FETCH();
1623 if( modrm >= 0xc0 ) {
1624 for (n=0;n < 8;n++)
1625 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)MMX((modrm >> 3) & 0x7).c[n] - (int16_t)MMX(modrm & 7).c[n]);
1626 } else {
1627 MMX_REG s;
1628 uint32_t ea = GetEA(modrm, 0);
1629 READMMX(ea, s);
1630 for (n=0;n < 8;n++)
1631 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)MMX((modrm >> 3) & 0x7).c[n] - (int16_t)s.c[n]);
1632 }
1633 CYCLES(1); // TODO: correct cycle count
1634 }
1635
mmx_psubsw_r64_rm64()1636 void i386_device::mmx_psubsw_r64_rm64() // Opcode 0f e9
1637 {
1638 int n;
1639 MMXPROLOG();
1640 uint8_t modrm = FETCH();
1641 if( modrm >= 0xc0 ) {
1642 for (n=0;n < 4;n++)
1643 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)MMX((modrm >> 3) & 0x7).s[n] - (int32_t)MMX(modrm & 7).s[n]);
1644 } else {
1645 MMX_REG s;
1646 uint32_t ea = GetEA(modrm, 0);
1647 READMMX(ea, s);
1648 for (n=0;n < 4;n++)
1649 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)MMX((modrm >> 3) & 0x7).s[n] - (int32_t)s.s[n]);
1650 }
1651 CYCLES(1); // TODO: correct cycle count
1652 }
1653
mmx_por_r64_rm64()1654 void i386_device::mmx_por_r64_rm64() // Opcode 0f eb
1655 {
1656 MMXPROLOG();
1657 uint8_t modrm = FETCH();
1658 if( modrm >= 0xc0 ) {
1659 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | MMX(modrm & 7).q;
1660 } else {
1661 MMX_REG s;
1662 uint32_t ea = GetEA(modrm, 0);
1663 READMMX(ea, s);
1664 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q | s.q;
1665 }
1666 CYCLES(1); // TODO: correct cycle count
1667 }
1668
mmx_paddsb_r64_rm64()1669 void i386_device::mmx_paddsb_r64_rm64() // Opcode 0f ec
1670 {
1671 int n;
1672 MMXPROLOG();
1673 uint8_t modrm = FETCH();
1674 if( modrm >= 0xc0 ) {
1675 for (n=0;n < 8;n++)
1676 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)MMX((modrm >> 3) & 0x7).c[n] + (int16_t)MMX(modrm & 7).c[n]);
1677 } else {
1678 MMX_REG s;
1679 uint32_t ea = GetEA(modrm, 0);
1680 READMMX(ea, s);
1681 for (n=0;n < 8;n++)
1682 MMX((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)MMX((modrm >> 3) & 0x7).c[n] + (int16_t)s.c[n]);
1683 }
1684 CYCLES(1); // TODO: correct cycle count
1685 }
1686
mmx_paddsw_r64_rm64()1687 void i386_device::mmx_paddsw_r64_rm64() // Opcode 0f ed
1688 {
1689 int n;
1690 MMXPROLOG();
1691 uint8_t modrm = FETCH();
1692 if( modrm >= 0xc0 ) {
1693 for (n=0;n < 4;n++)
1694 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)MMX((modrm >> 3) & 0x7).s[n] + (int32_t)MMX(modrm & 7).s[n]);
1695 } else {
1696 MMX_REG s;
1697 uint32_t ea = GetEA(modrm, 0);
1698 READMMX(ea, s);
1699 for (n=0;n < 4;n++)
1700 MMX((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)MMX((modrm >> 3) & 0x7).s[n] + (int32_t)s.s[n]);
1701 }
1702 CYCLES(1); // TODO: correct cycle count
1703 }
1704
mmx_pxor_r64_rm64()1705 void i386_device::mmx_pxor_r64_rm64() // Opcode 0f ef
1706 {
1707 MMXPROLOG();
1708 uint8_t modrm = FETCH();
1709 if( modrm >= 0xc0 ) {
1710 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ MMX(modrm & 7).q;
1711 } else {
1712 MMX_REG s;
1713 uint32_t ea = GetEA(modrm, 0);
1714 READMMX(ea, s);
1715 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q ^ s.q;
1716 }
1717 CYCLES(1); // TODO: correct cycle count
1718 }
1719
mmx_psllw_r64_rm64()1720 void i386_device::mmx_psllw_r64_rm64() // Opcode 0f f1
1721 {
1722 MMXPROLOG();
1723 uint8_t modrm = FETCH();
1724 if( modrm >= 0xc0 ) {
1725 int count=(int)MMX(modrm & 7).q;
1726 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1727 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1728 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1729 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1730 } else {
1731 MMX_REG s;
1732 uint32_t ea = GetEA(modrm, 0);
1733 READMMX(ea, s);
1734 int count=(int)s.q;
1735 MMX((modrm >> 3) & 0x7).w[0]=MMX((modrm >> 3) & 0x7).w[0] << count;
1736 MMX((modrm >> 3) & 0x7).w[1]=MMX((modrm >> 3) & 0x7).w[1] << count;
1737 MMX((modrm >> 3) & 0x7).w[2]=MMX((modrm >> 3) & 0x7).w[2] << count;
1738 MMX((modrm >> 3) & 0x7).w[3]=MMX((modrm >> 3) & 0x7).w[3] << count;
1739 }
1740 CYCLES(1); // TODO: correct cycle count
1741 }
1742
mmx_pslld_r64_rm64()1743 void i386_device::mmx_pslld_r64_rm64() // Opcode 0f f2
1744 {
1745 MMXPROLOG();
1746 uint8_t modrm = FETCH();
1747 if( modrm >= 0xc0 ) {
1748 int count=(int)MMX(modrm & 7).q;
1749 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1750 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1751 } else {
1752 MMX_REG s;
1753 uint32_t ea = GetEA(modrm, 0);
1754 READMMX(ea, s);
1755 int count=(int)s.q;
1756 MMX((modrm >> 3) & 0x7).d[0]=MMX((modrm >> 3) & 0x7).d[0] << count;
1757 MMX((modrm >> 3) & 0x7).d[1]=MMX((modrm >> 3) & 0x7).d[1] << count;
1758 }
1759 CYCLES(1); // TODO: correct cycle count
1760 }
1761
mmx_psllq_r64_rm64()1762 void i386_device::mmx_psllq_r64_rm64() // Opcode 0f f3
1763 {
1764 MMXPROLOG();
1765 uint8_t modrm = FETCH();
1766 if( modrm >= 0xc0 ) {
1767 int count=(int)MMX(modrm & 7).q;
1768 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1769 } else {
1770 MMX_REG s;
1771 uint32_t ea = GetEA(modrm, 0);
1772 READMMX(ea, s);
1773 int count=(int)s.q;
1774 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q << count;
1775 }
1776 CYCLES(1); // TODO: correct cycle count
1777 }
1778
mmx_pmaddwd_r64_rm64()1779 void i386_device::mmx_pmaddwd_r64_rm64() // Opcode 0f f5
1780 {
1781 MMXPROLOG();
1782 uint8_t modrm = FETCH();
1783 if( modrm >= 0xc0 ) {
1784 MMX((modrm >> 3) & 0x7).i[0]=(int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)MMX(modrm & 7).s[0]+
1785 (int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)MMX(modrm & 7).s[1];
1786 MMX((modrm >> 3) & 0x7).i[1]=(int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)MMX(modrm & 7).s[2]+
1787 (int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)MMX(modrm & 7).s[3];
1788 } else {
1789 MMX_REG s;
1790 uint32_t ea = GetEA(modrm, 0);
1791 READMMX(ea, s);
1792 MMX((modrm >> 3) & 0x7).i[0]=(int32_t)MMX((modrm >> 3) & 0x7).s[0]*(int32_t)s.s[0]+
1793 (int32_t)MMX((modrm >> 3) & 0x7).s[1]*(int32_t)s.s[1];
1794 MMX((modrm >> 3) & 0x7).i[1]=(int32_t)MMX((modrm >> 3) & 0x7).s[2]*(int32_t)s.s[2]+
1795 (int32_t)MMX((modrm >> 3) & 0x7).s[3]*(int32_t)s.s[3];
1796 }
1797 CYCLES(1); // TODO: correct cycle count
1798 }
1799
mmx_psubb_r64_rm64()1800 void i386_device::mmx_psubb_r64_rm64() // Opcode 0f f8
1801 {
1802 int n;
1803 MMXPROLOG();
1804 uint8_t modrm = FETCH();
1805 if( modrm >= 0xc0 ) {
1806 for (n=0;n < 8;n++)
1807 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - MMX(modrm & 7).b[n];
1808 } else {
1809 MMX_REG s;
1810 uint32_t ea = GetEA(modrm, 0);
1811 READMMX(ea, s);
1812 for (n=0;n < 8;n++)
1813 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] - s.b[n];
1814 }
1815 CYCLES(1); // TODO: correct cycle count
1816 }
1817
mmx_psubw_r64_rm64()1818 void i386_device::mmx_psubw_r64_rm64() // Opcode 0f f9
1819 {
1820 int n;
1821 MMXPROLOG();
1822 uint8_t modrm = FETCH();
1823 if( modrm >= 0xc0 ) {
1824 for (n=0;n < 4;n++)
1825 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - MMX(modrm & 7).w[n];
1826 } else {
1827 MMX_REG s;
1828 uint32_t ea = GetEA(modrm, 0);
1829 READMMX(ea, s);
1830 for (n=0;n < 4;n++)
1831 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] - s.w[n];
1832 }
1833 CYCLES(1); // TODO: correct cycle count
1834 }
1835
mmx_psubd_r64_rm64()1836 void i386_device::mmx_psubd_r64_rm64() // Opcode 0f fa
1837 {
1838 int n;
1839 MMXPROLOG();
1840 uint8_t modrm = FETCH();
1841 if( modrm >= 0xc0 ) {
1842 for (n=0;n < 2;n++)
1843 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - MMX(modrm & 7).d[n];
1844 } else {
1845 MMX_REG s;
1846 uint32_t ea = GetEA(modrm, 0);
1847 READMMX(ea, s);
1848 for (n=0;n < 2;n++)
1849 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] - s.d[n];
1850 }
1851 CYCLES(1); // TODO: correct cycle count
1852 }
1853
mmx_paddb_r64_rm64()1854 void i386_device::mmx_paddb_r64_rm64() // Opcode 0f fc
1855 {
1856 int n;
1857 MMXPROLOG();
1858 uint8_t modrm = FETCH();
1859 if( modrm >= 0xc0 ) {
1860 for (n=0;n < 8;n++)
1861 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + MMX(modrm & 7).b[n];
1862 } else {
1863 MMX_REG s;
1864 uint32_t ea = GetEA(modrm, 0);
1865 READMMX(ea, s);
1866 for (n=0;n < 8;n++)
1867 MMX((modrm >> 3) & 0x7).b[n]=MMX((modrm >> 3) & 0x7).b[n] + s.b[n];
1868 }
1869 CYCLES(1); // TODO: correct cycle count
1870 }
1871
mmx_paddw_r64_rm64()1872 void i386_device::mmx_paddw_r64_rm64() // Opcode 0f fd
1873 {
1874 int n;
1875 MMXPROLOG();
1876 uint8_t modrm = FETCH();
1877 if( modrm >= 0xc0 ) {
1878 for (n=0;n < 4;n++)
1879 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + MMX(modrm & 7).w[n];
1880 } else {
1881 MMX_REG s;
1882 uint32_t ea = GetEA(modrm, 0);
1883 READMMX(ea, s);
1884 for (n=0;n < 4;n++)
1885 MMX((modrm >> 3) & 0x7).w[n]=MMX((modrm >> 3) & 0x7).w[n] + s.w[n];
1886 }
1887 CYCLES(1); // TODO: correct cycle count
1888 }
1889
mmx_paddd_r64_rm64()1890 void i386_device::mmx_paddd_r64_rm64() // Opcode 0f fe
1891 {
1892 int n;
1893 MMXPROLOG();
1894 uint8_t modrm = FETCH();
1895 if( modrm >= 0xc0 ) {
1896 for (n=0;n < 2;n++)
1897 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + MMX(modrm & 7).d[n];
1898 } else {
1899 MMX_REG s;
1900 uint32_t ea = GetEA(modrm, 0);
1901 READMMX(ea, s);
1902 for (n=0;n < 2;n++)
1903 MMX((modrm >> 3) & 0x7).d[n]=MMX((modrm >> 3) & 0x7).d[n] + s.d[n];
1904 }
1905 CYCLES(1); // TODO: correct cycle count
1906 }
1907
mmx_emms()1908 void i386_device::mmx_emms() // Opcode 0f 77
1909 {
1910 m_x87_tw = 0xffff; // tag word = 0xffff
1911 // TODO
1912 CYCLES(1); // TODO: correct cycle count
1913 }
1914
i386_cyrix_svdc()1915 void i386_device::i386_cyrix_svdc() // Opcode 0f 78
1916 {
1917 uint8_t modrm = FETCH();
1918
1919 if( modrm < 0xc0 ) {
1920 uint32_t ea = GetEA(modrm,0);
1921 int index = (modrm >> 3) & 7;
1922 int limit;
1923 switch (index)
1924 {
1925 case 0:
1926 {
1927 index = ES;
1928 break;
1929 }
1930
1931 case 2:
1932 {
1933 index = SS;
1934 break;
1935 }
1936
1937 case 3:
1938 {
1939 index = DS;
1940 break;
1941 }
1942
1943 case 4:
1944 {
1945 index = FS;
1946 break;
1947 }
1948
1949 case 5:
1950 {
1951 index = GS;
1952 break;
1953 }
1954
1955 default:
1956 {
1957 i386_trap(6, 0, 0);
1958 }
1959 }
1960
1961 limit = m_sreg[index].limit;
1962
1963 if (m_sreg[index].flags & 0x8000) //G bit
1964 {
1965 limit >>= 12;
1966 }
1967
1968 WRITE16(ea + 0, limit);
1969 WRITE32(ea + 2, m_sreg[index].base);
1970 WRITE16(ea + 5, m_sreg[index].flags); //replace top 8 bits of base
1971 WRITE8(ea + 7, m_sreg[index].base >> 24);
1972 WRITE16(ea + 8, m_sreg[index].selector);
1973 } else {
1974 i386_trap(6, 0, 0);
1975 }
1976 CYCLES(1); // TODO: correct cycle count
1977 }
1978
i386_cyrix_rsdc()1979 void i386_device::i386_cyrix_rsdc() // Opcode 0f 79
1980 {
1981 uint8_t modrm = FETCH();
1982
1983 if( modrm < 0xc0 ) {
1984 uint32_t ea = GetEA(modrm,0);
1985 int index = (modrm >> 3) & 7;
1986 uint16_t flags;
1987 uint32_t base;
1988 uint32_t limit;
1989 switch (index)
1990 {
1991 case 0:
1992 {
1993 index = ES;
1994 break;
1995 }
1996
1997 case 2:
1998 {
1999 index = SS;
2000 break;
2001 }
2002
2003 case 3:
2004 {
2005 index = DS;
2006 break;
2007 }
2008
2009 case 4:
2010 {
2011 index = FS;
2012 break;
2013 }
2014
2015 case 5:
2016 {
2017 index = GS;
2018 break;
2019 }
2020
2021 default:
2022 {
2023 i386_trap(6, 0, 0);
2024 }
2025 }
2026
2027 base = (READ32(ea + 2) & 0x00ffffff) | (READ8(ea + 7) << 24);
2028 flags = READ16(ea + 5);
2029 limit = READ16(ea + 0) | ((flags & 3) << 16);
2030
2031 if (flags & 0x8000) //G bit
2032 {
2033 limit = (limit << 12) | 0xfff;
2034 }
2035
2036 m_sreg[index].selector = READ16(ea + 8);
2037 m_sreg[index].flags = flags;
2038 m_sreg[index].base = base;
2039 m_sreg[index].limit = limit;
2040 } else {
2041 i386_trap(6, 0, 0);
2042 }
2043 CYCLES(1); // TODO: correct cycle count
2044 }
2045
i386_cyrix_svldt()2046 void i386_device::i386_cyrix_svldt() // Opcode 0f 7a
2047 {
2048 if ( PROTECTED_MODE && !V8086_MODE )
2049 {
2050 uint8_t modrm = FETCH();
2051
2052 if( !(modrm & 0xf8) ) {
2053 uint32_t ea = GetEA(modrm,0);
2054 uint32_t limit = m_ldtr.limit;
2055
2056 if (m_ldtr.flags & 0x8000) //G bit
2057 {
2058 limit >>= 12;
2059 }
2060
2061 WRITE16(ea + 0, limit);
2062 WRITE32(ea + 2, m_ldtr.base);
2063 WRITE16(ea + 5, m_ldtr.flags); //replace top 8 bits of base
2064 WRITE8(ea + 7, m_ldtr.base >> 24);
2065 WRITE16(ea + 8, m_ldtr.segment);
2066 } else {
2067 i386_trap(6, 0, 0);
2068 }
2069 } else {
2070 i386_trap(6, 0, 0);
2071 }
2072 CYCLES(1); // TODO: correct cycle count
2073 }
2074
i386_cyrix_rsldt()2075 void i386_device::i386_cyrix_rsldt() // Opcode 0f 7b
2076 {
2077 if ( PROTECTED_MODE && !V8086_MODE )
2078 {
2079 if(m_CPL)
2080 FAULT(FAULT_GP,0)
2081
2082 uint8_t modrm = FETCH();
2083
2084 if( !(modrm & 0xf8) ) {
2085 uint32_t ea = GetEA(modrm,0);
2086 uint16_t flags = READ16(ea + 5);
2087 uint32_t base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2088 uint32_t limit = READ16(ea + 0) | ((flags & 3) << 16);
2089 I386_SREG seg;
2090
2091 if (flags & 0x8000) //G bit
2092 {
2093 limit = (limit << 12) | 0xfff;
2094 }
2095
2096 memset(&seg, 0, sizeof(seg));
2097 seg.selector = READ16(ea + 8);
2098 i386_load_protected_mode_segment(&seg,nullptr);
2099 m_ldtr.limit = limit;
2100 m_ldtr.base = base;
2101 m_ldtr.flags = flags;
2102 } else {
2103 i386_trap(6, 0, 0);
2104 }
2105 } else {
2106 i386_trap(6, 0, 0);
2107 }
2108 CYCLES(1); // TODO: correct cycle count
2109 }
2110
i386_cyrix_svts()2111 void i386_device::i386_cyrix_svts() // Opcode 0f 7c
2112 {
2113 if ( PROTECTED_MODE )
2114 {
2115 uint8_t modrm = FETCH();
2116
2117 if( !(modrm & 0xf8) ) {
2118 uint32_t ea = GetEA(modrm,0);
2119 uint32_t limit = m_task.limit;
2120
2121 if (m_task.flags & 0x8000) //G bit
2122 {
2123 limit >>= 12;
2124 }
2125
2126 WRITE16(ea + 0, limit);
2127 WRITE32(ea + 2, m_task.base);
2128 WRITE16(ea + 5, m_task.flags); //replace top 8 bits of base
2129 WRITE8(ea + 7, m_task.base >> 24);
2130 WRITE16(ea + 8, m_task.segment);
2131 } else {
2132 i386_trap(6, 0, 0);
2133 }
2134 } else {
2135 i386_trap(6, 0, 0);
2136 }
2137 }
2138
i386_cyrix_rsts()2139 void i386_device::i386_cyrix_rsts() // Opcode 0f 7d
2140 {
2141 if ( PROTECTED_MODE )
2142 {
2143 if(m_CPL)
2144 FAULT(FAULT_GP,0)
2145
2146 uint8_t modrm = FETCH();
2147
2148 if( !(modrm & 0xf8) ) {
2149 uint32_t ea = GetEA(modrm,0);
2150 uint16_t flags = READ16(ea + 5);
2151 uint32_t base = (READ32(ea + 2) | 0x00ffffff) | (READ8(ea + 7) << 24);
2152 uint32_t limit = READ16(ea + 0) | ((flags & 3) << 16);
2153
2154 if (flags & 0x8000) //G bit
2155 {
2156 limit = (limit << 12) | 0xfff;
2157 }
2158 m_task.segment = READ16(ea + 8);
2159 m_task.limit = limit;
2160 m_task.base = base;
2161 m_task.flags = flags;
2162 } else {
2163 i386_trap(6, 0, 0);
2164 }
2165 } else {
2166 i386_trap(6, 0, 0);
2167 }
2168 CYCLES(1); // TODO: correct cycle count
2169 }
2170
mmx_movd_r64_rm32()2171 void i386_device::mmx_movd_r64_rm32() // Opcode 0f 6e
2172 {
2173 MMXPROLOG();
2174 uint8_t modrm = FETCH();
2175 if( modrm >= 0xc0 ) {
2176 MMX((modrm >> 3) & 0x7).d[0]=LOAD_RM32(modrm);
2177 } else {
2178 uint32_t ea = GetEA(modrm, 0);
2179 MMX((modrm >> 3) & 0x7).d[0]=READ32(ea);
2180 }
2181 MMX((modrm >> 3) & 0x7).d[1]=0;
2182 CYCLES(1); // TODO: correct cycle count
2183 }
2184
mmx_movq_r64_rm64()2185 void i386_device::mmx_movq_r64_rm64() // Opcode 0f 6f
2186 {
2187 MMXPROLOG();
2188 uint8_t modrm = FETCH();
2189 if( modrm >= 0xc0 ) {
2190 MMX((modrm >> 3) & 0x7).l=MMX(modrm & 0x7).l;
2191 } else {
2192 uint32_t ea = GetEA(modrm, 0);
2193 READMMX(ea, MMX((modrm >> 3) & 0x7));
2194 }
2195 CYCLES(1); // TODO: correct cycle count
2196 }
2197
mmx_movd_rm32_r64()2198 void i386_device::mmx_movd_rm32_r64() // Opcode 0f 7e
2199 {
2200 MMXPROLOG();
2201 uint8_t modrm = FETCH();
2202 if( modrm >= 0xc0 ) {
2203 STORE_RM32(modrm, MMX((modrm >> 3) & 0x7).d[0]);
2204 } else {
2205 uint32_t ea = GetEA(modrm, 0);
2206 WRITE32(ea, MMX((modrm >> 3) & 0x7).d[0]);
2207 }
2208 CYCLES(1); // TODO: correct cycle count
2209 }
2210
mmx_movq_rm64_r64()2211 void i386_device::mmx_movq_rm64_r64() // Opcode 0f 7f
2212 {
2213 MMXPROLOG();
2214 uint8_t modrm = FETCH();
2215 if( modrm >= 0xc0 ) {
2216 MMX(modrm & 0x7)=MMX((modrm >> 3) & 0x7);
2217 } else {
2218 uint32_t ea = GetEA(modrm, 0);
2219 WRITEMMX(ea, MMX((modrm >> 3) & 0x7));
2220 }
2221 CYCLES(1); // TODO: correct cycle count
2222 }
2223
mmx_pcmpeqb_r64_rm64()2224 void i386_device::mmx_pcmpeqb_r64_rm64() // Opcode 0f 74
2225 {
2226 int c;
2227 MMXPROLOG();
2228 uint8_t modrm = FETCH();
2229 if( modrm >= 0xc0 ) {
2230 int s,d;
2231 s=modrm & 0x7;
2232 d=(modrm >> 3) & 0x7;
2233 for (c=0;c <= 7;c++)
2234 MMX(d).b[c]=(MMX(d).b[c] == MMX(s).b[c]) ? 0xff : 0;
2235 } else {
2236 MMX_REG s;
2237 int d=(modrm >> 3) & 0x7;
2238 uint32_t ea = GetEA(modrm, 0);
2239 READMMX(ea, s);
2240 for (c=0;c <= 7;c++)
2241 MMX(d).b[c]=(MMX(d).b[c] == s.b[c]) ? 0xff : 0;
2242 }
2243 CYCLES(1); // TODO: correct cycle count
2244 }
2245
mmx_pcmpeqw_r64_rm64()2246 void i386_device::mmx_pcmpeqw_r64_rm64() // Opcode 0f 75
2247 {
2248 MMXPROLOG();
2249 uint8_t modrm = FETCH();
2250 if( modrm >= 0xc0 ) {
2251 int s,d;
2252 s=modrm & 0x7;
2253 d=(modrm >> 3) & 0x7;
2254 MMX(d).w[0]=(MMX(d).w[0] == MMX(s).w[0]) ? 0xffff : 0;
2255 MMX(d).w[1]=(MMX(d).w[1] == MMX(s).w[1]) ? 0xffff : 0;
2256 MMX(d).w[2]=(MMX(d).w[2] == MMX(s).w[2]) ? 0xffff : 0;
2257 MMX(d).w[3]=(MMX(d).w[3] == MMX(s).w[3]) ? 0xffff : 0;
2258 } else {
2259 MMX_REG s;
2260 int d=(modrm >> 3) & 0x7;
2261 uint32_t ea = GetEA(modrm, 0);
2262 READMMX(ea, s);
2263 MMX(d).w[0]=(MMX(d).w[0] == s.w[0]) ? 0xffff : 0;
2264 MMX(d).w[1]=(MMX(d).w[1] == s.w[1]) ? 0xffff : 0;
2265 MMX(d).w[2]=(MMX(d).w[2] == s.w[2]) ? 0xffff : 0;
2266 MMX(d).w[3]=(MMX(d).w[3] == s.w[3]) ? 0xffff : 0;
2267 }
2268 CYCLES(1); // TODO: correct cycle count
2269 }
2270
mmx_pcmpeqd_r64_rm64()2271 void i386_device::mmx_pcmpeqd_r64_rm64() // Opcode 0f 76
2272 {
2273 MMXPROLOG();
2274 uint8_t modrm = FETCH();
2275 if( modrm >= 0xc0 ) {
2276 int s,d;
2277 s=modrm & 0x7;
2278 d=(modrm >> 3) & 0x7;
2279 MMX(d).d[0]=(MMX(d).d[0] == MMX(s).d[0]) ? 0xffffffff : 0;
2280 MMX(d).d[1]=(MMX(d).d[1] == MMX(s).d[1]) ? 0xffffffff : 0;
2281 } else {
2282 MMX_REG s;
2283 int d=(modrm >> 3) & 0x7;
2284 uint32_t ea = GetEA(modrm, 0);
2285 READMMX(ea, s);
2286 MMX(d).d[0]=(MMX(d).d[0] == s.d[0]) ? 0xffffffff : 0;
2287 MMX(d).d[1]=(MMX(d).d[1] == s.d[1]) ? 0xffffffff : 0;
2288 }
2289 CYCLES(1); // TODO: correct cycle count
2290 }
2291
mmx_pshufw_r64_rm64_i8()2292 void i386_device::mmx_pshufw_r64_rm64_i8() // Opcode 0f 70
2293 {
2294 MMXPROLOG();
2295 uint8_t modrm = FETCH();
2296 if( modrm >= 0xc0 ) {
2297 MMX_REG t;
2298 int s,d;
2299 uint8_t imm8 = FETCH();
2300 s=modrm & 0x7;
2301 d=(modrm >> 3) & 0x7;
2302 t.q=MMX(s).q;
2303 MMX(d).w[0]=t.w[imm8 & 3];
2304 MMX(d).w[1]=t.w[(imm8 >> 2) & 3];
2305 MMX(d).w[2]=t.w[(imm8 >> 4) & 3];
2306 MMX(d).w[3]=t.w[(imm8 >> 6) & 3];
2307 } else {
2308 MMX_REG s;
2309 int d=(modrm >> 3) & 0x7;
2310 uint32_t ea = GetEA(modrm, 0);
2311 uint8_t imm8 = FETCH();
2312 READMMX(ea, s);
2313 MMX(d).w[0]=s.w[imm8 & 3];
2314 MMX(d).w[1]=s.w[(imm8 >> 2) & 3];
2315 MMX(d).w[2]=s.w[(imm8 >> 4) & 3];
2316 MMX(d).w[3]=s.w[(imm8 >> 6) & 3];
2317 }
2318 CYCLES(1); // TODO: correct cycle count
2319 }
2320
sse_punpcklbw_r128_rm128()2321 void i386_device::sse_punpcklbw_r128_rm128() // Opcode 66 0f 60
2322 {
2323 uint8_t modrm = FETCH();
2324 if (modrm >= 0xc0) {
2325 XMM_REG xd,xs;
2326 int s, d;
2327 s = modrm & 0x7;
2328 d = (modrm >> 3) & 0x7;
2329 xd.l[0] = XMM(d).l[0];
2330 xs.l[0] = XMM(s).l[0];
2331 XMM(d).b[0] = xd.b[0];
2332 XMM(d).b[1] = xs.b[0];
2333 XMM(d).b[2] = xd.b[1];
2334 XMM(d).b[3] = xs.b[1];
2335 XMM(d).b[4] = xd.b[2];
2336 XMM(d).b[5] = xs.b[2];
2337 XMM(d).b[6] = xd.b[3];
2338 XMM(d).b[7] = xs.b[3];
2339 XMM(d).b[8] = xd.b[4];
2340 XMM(d).b[9] = xs.b[4];
2341 XMM(d).b[10] = xd.b[5];
2342 XMM(d).b[11] = xs.b[5];
2343 XMM(d).b[12] = xd.b[6];
2344 XMM(d).b[13] = xs.b[6];
2345 XMM(d).b[14] = xd.b[7];
2346 XMM(d).b[15] = xs.b[7];
2347 }
2348 else {
2349 XMM_REG xd, xs;
2350 int d = (modrm >> 3) & 0x7;
2351 uint32_t ea = GetEA(modrm, 0);
2352 xd.l[0] = XMM(d).l[0];
2353 xs.q[0] = READ64(ea);
2354 for (int n = 0; n < 8; n++) {
2355 XMM(d).b[n << 1] = xd.b[n];
2356 XMM(d).b[(n << 1) | 1] = xs.b[n];
2357 }
2358 }
2359 CYCLES(1); // TODO: correct cycle count
2360 }
2361
sse_punpcklwd_r128_rm128()2362 void i386_device::sse_punpcklwd_r128_rm128()
2363 {
2364 uint8_t modrm = FETCH();
2365 if (modrm >= 0xc0) {
2366 XMM_REG xd, xs;
2367 int s, d;
2368 s = modrm & 0x7;
2369 d = (modrm >> 3) & 0x7;
2370 xd.l[0] = XMM(d).l[0];
2371 xs.l[0] = XMM(s).l[0];
2372 for (int n = 0; n < 4; n++) {
2373 XMM(d).w[n << 1] = xd.w[n];
2374 XMM(d).w[(n << 1) | 1] = xs.w[n];
2375 }
2376 }
2377 else {
2378 XMM_REG xd, xs;
2379 int d = (modrm >> 3) & 0x7;
2380 uint32_t ea = GetEA(modrm, 0);
2381 xd.l[0] = XMM(d).l[0];
2382 xs.q[0] = READ64(ea);
2383 for (int n = 0; n < 4; n++) {
2384 XMM(d).w[n << 1] = xd.w[n];
2385 XMM(d).w[(n << 1) | 1] = xs.w[n];
2386 }
2387 }
2388 CYCLES(1); // TODO: correct cycle count
2389 }
2390
sse_punpckldq_r128_rm128()2391 void i386_device::sse_punpckldq_r128_rm128()
2392 {
2393 uint8_t modrm = FETCH();
2394 if (modrm >= 0xc0) {
2395 XMM_REG xd, xs;
2396 int s, d;
2397 s = modrm & 0x7;
2398 d = (modrm >> 3) & 0x7;
2399 xd.l[0] = XMM(d).l[0];
2400 xs.l[0] = XMM(s).l[0];
2401 for (int n = 0; n < 2; n++) {
2402 XMM(d).d[n << 1] = xd.d[n];
2403 XMM(d).d[(n << 1) | 1] = xs.d[n];
2404 }
2405 }
2406 else {
2407 XMM_REG xd, xs;
2408 int d = (modrm >> 3) & 0x7;
2409 uint32_t ea = GetEA(modrm, 0);
2410 xd.l[0] = XMM(d).l[0];
2411 xs.q[0] = READ64(ea);
2412 for (int n = 0; n < 2; n++) {
2413 XMM(d).d[n << 1] = xd.d[n];
2414 XMM(d).d[(n << 1) | 1] = xs.d[n];
2415 }
2416 }
2417 CYCLES(1); // TODO: correct cycle count
2418 }
2419
sse_punpcklqdq_r128_rm128()2420 void i386_device::sse_punpcklqdq_r128_rm128()
2421 {
2422 uint8_t modrm = FETCH();
2423 if (modrm >= 0xc0) {
2424 XMM_REG xd, xs;
2425 int s, d;
2426 s = modrm & 0x7;
2427 d = (modrm >> 3) & 0x7;
2428 xd.l[0] = XMM(d).l[0];
2429 xs.l[0] = XMM(s).l[0];
2430 XMM(d).q[0] = xd.q[0];
2431 XMM(d).q[1] = xs.q[0];
2432 }
2433 else {
2434 XMM_REG xd, xs;
2435 int d = (modrm >> 3) & 0x7;
2436 uint32_t ea = GetEA(modrm, 0);
2437 xd.l[0] = XMM(d).l[0];
2438 xs.q[0] = READ64(ea);
2439 XMM(d).q[0] = xd.q[0];
2440 XMM(d).q[1] = xs.q[0];
2441 }
2442 CYCLES(1); // TODO: correct cycle count
2443 }
2444
mmx_punpcklbw_r64_r64m32()2445 void i386_device::mmx_punpcklbw_r64_r64m32() // Opcode 0f 60
2446 {
2447 MMXPROLOG();
2448 uint8_t modrm = FETCH();
2449 if( modrm >= 0xc0 ) {
2450 uint32_t t;
2451 int s,d;
2452 s=modrm & 0x7;
2453 d=(modrm >> 3) & 0x7;
2454 t=MMX(d).d[0];
2455 MMX(d).b[0]=t & 0xff;
2456 MMX(d).b[1]=MMX(s).b[0];
2457 MMX(d).b[2]=(t >> 8) & 0xff;
2458 MMX(d).b[3]=MMX(s).b[1];
2459 MMX(d).b[4]=(t >> 16) & 0xff;
2460 MMX(d).b[5]=MMX(s).b[2];
2461 MMX(d).b[6]=(t >> 24) & 0xff;
2462 MMX(d).b[7]=MMX(s).b[3];
2463 } else {
2464 uint32_t s,t;
2465 int d=(modrm >> 3) & 0x7;
2466 uint32_t ea = GetEA(modrm, 0);
2467 s = READ32(ea);
2468 t=MMX(d).d[0];
2469 MMX(d).b[0]=t & 0xff;
2470 MMX(d).b[1]=s & 0xff;
2471 MMX(d).b[2]=(t >> 8) & 0xff;
2472 MMX(d).b[3]=(s >> 8) & 0xff;
2473 MMX(d).b[4]=(t >> 16) & 0xff;
2474 MMX(d).b[5]=(s >> 16) & 0xff;
2475 MMX(d).b[6]=(t >> 24) & 0xff;
2476 MMX(d).b[7]=(s >> 24) & 0xff;
2477 }
2478 CYCLES(1); // TODO: correct cycle count
2479 }
2480
mmx_punpcklwd_r64_r64m32()2481 void i386_device::mmx_punpcklwd_r64_r64m32() // Opcode 0f 61
2482 {
2483 MMXPROLOG();
2484 uint8_t modrm = FETCH();
2485 if( modrm >= 0xc0 ) {
2486 uint16_t t;
2487 int s,d;
2488 s=modrm & 0x7;
2489 d=(modrm >> 3) & 0x7;
2490 t=MMX(d).w[1];
2491 MMX(d).w[0]=MMX(d).w[0];
2492 MMX(d).w[1]=MMX(s).w[0];
2493 MMX(d).w[2]=t;
2494 MMX(d).w[3]=MMX(s).w[1];
2495 } else {
2496 uint32_t s;
2497 uint16_t t;
2498 int d=(modrm >> 3) & 0x7;
2499 uint32_t ea = GetEA(modrm, 0);
2500 s = READ32(ea);
2501 t=MMX(d).w[1];
2502 MMX(d).w[0]=MMX(d).w[0];
2503 MMX(d).w[1]=s & 0xffff;
2504 MMX(d).w[2]=t;
2505 MMX(d).w[3]=(s >> 16) & 0xffff;
2506 }
2507 CYCLES(1); // TODO: correct cycle count
2508 }
2509
mmx_punpckldq_r64_r64m32()2510 void i386_device::mmx_punpckldq_r64_r64m32() // Opcode 0f 62
2511 {
2512 MMXPROLOG();
2513 uint8_t modrm = FETCH();
2514 if( modrm >= 0xc0 ) {
2515 int s,d;
2516 s=modrm & 0x7;
2517 d=(modrm >> 3) & 0x7;
2518 MMX(d).d[0]=MMX(d).d[0];
2519 MMX(d).d[1]=MMX(s).d[0];
2520 } else {
2521 uint32_t s;
2522 int d=(modrm >> 3) & 0x7;
2523 uint32_t ea = GetEA(modrm, 0);
2524 s = READ32(ea);
2525 MMX(d).d[0]=MMX(d).d[0];
2526 MMX(d).d[1]=s;
2527 }
2528 CYCLES(1); // TODO: correct cycle count
2529 }
2530
mmx_packsswb_r64_rm64()2531 void i386_device::mmx_packsswb_r64_rm64() // Opcode 0f 63
2532 {
2533 MMXPROLOG();
2534 uint8_t modrm = FETCH();
2535 if( modrm >= 0xc0 ) {
2536 int s,d;
2537 s=modrm & 0x7;
2538 d=(modrm >> 3) & 0x7;
2539 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2540 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2541 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2542 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2543 MMX(d).c[4]=SaturatedSignedWordToSignedByte(MMX(s).s[0]);
2544 MMX(d).c[5]=SaturatedSignedWordToSignedByte(MMX(s).s[1]);
2545 MMX(d).c[6]=SaturatedSignedWordToSignedByte(MMX(s).s[2]);
2546 MMX(d).c[7]=SaturatedSignedWordToSignedByte(MMX(s).s[3]);
2547 } else {
2548 MMX_REG s;
2549 int d=(modrm >> 3) & 0x7;
2550 uint32_t ea = GetEA(modrm, 0);
2551 READMMX(ea, s);
2552 MMX(d).c[0]=SaturatedSignedWordToSignedByte(MMX(d).s[0]);
2553 MMX(d).c[1]=SaturatedSignedWordToSignedByte(MMX(d).s[1]);
2554 MMX(d).c[2]=SaturatedSignedWordToSignedByte(MMX(d).s[2]);
2555 MMX(d).c[3]=SaturatedSignedWordToSignedByte(MMX(d).s[3]);
2556 MMX(d).c[4]=SaturatedSignedWordToSignedByte(s.s[0]);
2557 MMX(d).c[5]=SaturatedSignedWordToSignedByte(s.s[1]);
2558 MMX(d).c[6]=SaturatedSignedWordToSignedByte(s.s[2]);
2559 MMX(d).c[7]=SaturatedSignedWordToSignedByte(s.s[3]);
2560 }
2561 CYCLES(1); // TODO: correct cycle count
2562 }
2563
mmx_pcmpgtb_r64_rm64()2564 void i386_device::mmx_pcmpgtb_r64_rm64() // Opcode 0f 64
2565 {
2566 int c;
2567 MMXPROLOG();
2568 uint8_t modrm = FETCH();
2569 if( modrm >= 0xc0 ) {
2570 int s,d;
2571 s=modrm & 0x7;
2572 d=(modrm >> 3) & 0x7;
2573 for (c=0;c <= 7;c++)
2574 MMX(d).b[c]=(MMX(d).c[c] > MMX(s).c[c]) ? 0xff : 0;
2575 } else {
2576 MMX_REG s;
2577 int d=(modrm >> 3) & 0x7;
2578 uint32_t ea = GetEA(modrm, 0);
2579 READMMX(ea, s);
2580 for (c=0;c <= 7;c++)
2581 MMX(d).b[c]=(MMX(d).c[c] > s.c[c]) ? 0xff : 0;
2582 }
2583 CYCLES(1); // TODO: correct cycle count
2584 }
2585
mmx_pcmpgtw_r64_rm64()2586 void i386_device::mmx_pcmpgtw_r64_rm64() // Opcode 0f 65
2587 {
2588 int c;
2589 MMXPROLOG();
2590 uint8_t modrm = FETCH();
2591 if( modrm >= 0xc0 ) {
2592 int s,d;
2593 s=modrm & 0x7;
2594 d=(modrm >> 3) & 0x7;
2595 for (c=0;c <= 3;c++)
2596 MMX(d).w[c]=(MMX(d).s[c] > MMX(s).s[c]) ? 0xffff : 0;
2597 } else {
2598 MMX_REG s;
2599 int d=(modrm >> 3) & 0x7;
2600 uint32_t ea = GetEA(modrm, 0);
2601 READMMX(ea, s);
2602 for (c=0;c <= 3;c++)
2603 MMX(d).w[c]=(MMX(d).s[c] > s.s[c]) ? 0xffff : 0;
2604 }
2605 CYCLES(1); // TODO: correct cycle count
2606 }
2607
mmx_pcmpgtd_r64_rm64()2608 void i386_device::mmx_pcmpgtd_r64_rm64() // Opcode 0f 66
2609 {
2610 int c;
2611 MMXPROLOG();
2612 uint8_t modrm = FETCH();
2613 if( modrm >= 0xc0 ) {
2614 int s,d;
2615 s=modrm & 0x7;
2616 d=(modrm >> 3) & 0x7;
2617 for (c=0;c <= 1;c++)
2618 MMX(d).d[c]=(MMX(d).i[c] > MMX(s).i[c]) ? 0xffffffff : 0;
2619 } else {
2620 MMX_REG s;
2621 int d=(modrm >> 3) & 0x7;
2622 uint32_t ea = GetEA(modrm, 0);
2623 READMMX(ea, s);
2624 for (c=0;c <= 1;c++)
2625 MMX(d).d[c]=(MMX(d).i[c] > s.i[c]) ? 0xffffffff : 0;
2626 }
2627 CYCLES(1); // TODO: correct cycle count
2628 }
2629
mmx_packuswb_r64_rm64()2630 void i386_device::mmx_packuswb_r64_rm64() // Opcode 0f 67
2631 {
2632 MMXPROLOG();
2633 uint8_t modrm = FETCH();
2634 if( modrm >= 0xc0 ) {
2635 MMX_REG ds, sd;
2636 int s,d;
2637 s=modrm & 0x7;
2638 d=(modrm >> 3) & 0x7;
2639 ds.q = MMX(d).q;
2640 sd.q = MMX(s).q;
2641 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(ds.s[0]);
2642 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(ds.s[1]);
2643 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(ds.s[2]);
2644 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(ds.s[3]);
2645 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(sd.s[0]);
2646 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(sd.s[1]);
2647 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(sd.s[2]);
2648 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(sd.s[3]);
2649 } else {
2650 MMX_REG s,t;
2651 int d=(modrm >> 3) & 0x7;
2652 uint32_t ea = GetEA(modrm, 0);
2653 READMMX(ea, s);
2654 t.q = MMX(d).q;
2655 MMX(d).b[0]=SaturatedSignedWordToUnsignedByte(t.s[0]);
2656 MMX(d).b[1]=SaturatedSignedWordToUnsignedByte(t.s[1]);
2657 MMX(d).b[2]=SaturatedSignedWordToUnsignedByte(t.s[2]);
2658 MMX(d).b[3]=SaturatedSignedWordToUnsignedByte(t.s[3]);
2659 MMX(d).b[4]=SaturatedSignedWordToUnsignedByte(s.s[0]);
2660 MMX(d).b[5]=SaturatedSignedWordToUnsignedByte(s.s[1]);
2661 MMX(d).b[6]=SaturatedSignedWordToUnsignedByte(s.s[2]);
2662 MMX(d).b[7]=SaturatedSignedWordToUnsignedByte(s.s[3]);
2663 }
2664 CYCLES(1); // TODO: correct cycle count
2665 }
2666
mmx_punpckhbw_r64_rm64()2667 void i386_device::mmx_punpckhbw_r64_rm64() // Opcode 0f 68
2668 {
2669 MMXPROLOG();
2670 uint8_t modrm = FETCH();
2671 if( modrm >= 0xc0 ) {
2672 int s,d;
2673 s=modrm & 0x7;
2674 d=(modrm >> 3) & 0x7;
2675 MMX(d).b[0]=MMX(d).b[4];
2676 MMX(d).b[1]=MMX(s).b[4];
2677 MMX(d).b[2]=MMX(d).b[5];
2678 MMX(d).b[3]=MMX(s).b[5];
2679 MMX(d).b[4]=MMX(d).b[6];
2680 MMX(d).b[5]=MMX(s).b[6];
2681 MMX(d).b[6]=MMX(d).b[7];
2682 MMX(d).b[7]=MMX(s).b[7];
2683 } else {
2684 MMX_REG s;
2685 int d=(modrm >> 3) & 0x7;
2686 uint32_t ea = GetEA(modrm, 0);
2687 READMMX(ea, s);
2688 MMX(d).b[0]=MMX(d).b[4];
2689 MMX(d).b[1]=s.b[4];
2690 MMX(d).b[2]=MMX(d).b[5];
2691 MMX(d).b[3]=s.b[5];
2692 MMX(d).b[4]=MMX(d).b[6];
2693 MMX(d).b[5]=s.b[6];
2694 MMX(d).b[6]=MMX(d).b[7];
2695 MMX(d).b[7]=s.b[7];
2696 }
2697 CYCLES(1); // TODO: correct cycle count
2698 }
2699
mmx_punpckhwd_r64_rm64()2700 void i386_device::mmx_punpckhwd_r64_rm64() // Opcode 0f 69
2701 {
2702 MMXPROLOG();
2703 uint8_t modrm = FETCH();
2704 if( modrm >= 0xc0 ) {
2705 int s,d;
2706 s=modrm & 0x7;
2707 d=(modrm >> 3) & 0x7;
2708 MMX(d).w[0]=MMX(d).w[2];
2709 MMX(d).w[1]=MMX(s).w[2];
2710 MMX(d).w[2]=MMX(d).w[3];
2711 MMX(d).w[3]=MMX(s).w[3];
2712 } else {
2713 MMX_REG s;
2714 int d=(modrm >> 3) & 0x7;
2715 uint32_t ea = GetEA(modrm, 0);
2716 READMMX(ea, s);
2717 MMX(d).w[0]=MMX(d).w[2];
2718 MMX(d).w[1]=s.w[2];
2719 MMX(d).w[2]=MMX(d).w[3];
2720 MMX(d).w[3]=s.w[3];
2721 }
2722 CYCLES(1); // TODO: correct cycle count
2723 }
2724
mmx_punpckhdq_r64_rm64()2725 void i386_device::mmx_punpckhdq_r64_rm64() // Opcode 0f 6a
2726 {
2727 MMXPROLOG();
2728 uint8_t modrm = FETCH();
2729 if( modrm >= 0xc0 ) {
2730 int s,d;
2731 s=modrm & 0x7;
2732 d=(modrm >> 3) & 0x7;
2733 MMX(d).d[0]=MMX(d).d[1];
2734 MMX(d).d[1]=MMX(s).d[1];
2735 } else {
2736 MMX_REG s;
2737 int d=(modrm >> 3) & 0x7;
2738 uint32_t ea = GetEA(modrm, 0);
2739 READMMX(ea, s);
2740 MMX(d).d[0]=MMX(d).d[1];
2741 MMX(d).d[1]=s.d[1];
2742 }
2743 CYCLES(1); // TODO: correct cycle count
2744 }
2745
mmx_packssdw_r64_rm64()2746 void i386_device::mmx_packssdw_r64_rm64() // Opcode 0f 6b
2747 {
2748 MMXPROLOG();
2749 uint8_t modrm = FETCH();
2750 if( modrm >= 0xc0 ) {
2751 int s,d;
2752 int32_t t1, t2, t3, t4;
2753 s=modrm & 0x7;
2754 d=(modrm >> 3) & 0x7;
2755 t1 = MMX(d).i[0];
2756 t2 = MMX(d).i[1];
2757 t3 = MMX(s).i[0];
2758 t4 = MMX(s).i[1];
2759 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2760 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2761 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(t3);
2762 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(t4);
2763 }
2764 else {
2765 MMX_REG s;
2766 int32_t t1, t2;
2767 int d=(modrm >> 3) & 0x7;
2768 uint32_t ea = GetEA(modrm, 0);
2769 READMMX(ea, s);
2770 t1 = MMX(d).i[0];
2771 t2 = MMX(d).i[1];
2772 MMX(d).s[0] = SaturatedSignedDwordToSignedWord(t1);
2773 MMX(d).s[1] = SaturatedSignedDwordToSignedWord(t2);
2774 MMX(d).s[2] = SaturatedSignedDwordToSignedWord(s.i[0]);
2775 MMX(d).s[3] = SaturatedSignedDwordToSignedWord(s.i[1]);
2776 }
2777 CYCLES(1); // TODO: correct cycle count
2778 }
2779
sse_group_0fae()2780 void i386_device::sse_group_0fae() // Opcode 0f ae
2781 {
2782 uint8_t modm = FETCH();
2783 if( modm == 0xf8 ) {
2784 logerror("Unemulated SFENCE opcode called\n");
2785 CYCLES(1); // sfence instruction
2786 } else if( modm == 0xf0 ) {
2787 CYCLES(1); // mfence instruction
2788 } else if( modm == 0xe8 ) {
2789 CYCLES(1); // lfence instruction
2790 } else if( modm < 0xc0 ) {
2791 uint32_t ea;
2792 switch ( (modm & 0x38) >> 3 )
2793 {
2794 case 2: // ldmxcsr m32
2795 ea = GetEA(modm, 0);
2796 m_mxcsr = READ32(ea);
2797 break;
2798 case 3: // stmxcsr m32
2799 ea = GetEA(modm, 0);
2800 WRITE32(ea, m_mxcsr);
2801 break;
2802 case 7: // clflush m8
2803 GetNonTranslatedEA(modm, nullptr);
2804 break;
2805 default:
2806 report_invalid_modrm("sse_group_0fae", modm);
2807 }
2808 } else {
2809 report_invalid_modrm("sse_group_0fae", modm);
2810 }
2811 }
2812
sse_cvttps2dq_r128_rm128()2813 void i386_device::sse_cvttps2dq_r128_rm128() // Opcode f3 0f 5b
2814 {
2815 uint8_t modrm = FETCH();
2816 if( modrm >= 0xc0 ) {
2817 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)XMM(modrm & 0x7).f[0];
2818 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)XMM(modrm & 0x7).f[1];
2819 XMM((modrm >> 3) & 0x7).i[2]=(int32_t)XMM(modrm & 0x7).f[2];
2820 XMM((modrm >> 3) & 0x7).i[3]=(int32_t)XMM(modrm & 0x7).f[3];
2821 } else {
2822 XMM_REG src;
2823 uint32_t ea = GetEA(modrm, 0);
2824 READXMM(ea, src);
2825 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)src.f[0];
2826 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)src.f[1];
2827 XMM((modrm >> 3) & 0x7).i[2]=(int32_t)src.f[2];
2828 XMM((modrm >> 3) & 0x7).i[3]=(int32_t)src.f[3];
2829 }
2830 CYCLES(1); // TODO: correct cycle count
2831 }
2832
sse_cvtss2sd_r128_r128m32()2833 void i386_device::sse_cvtss2sd_r128_r128m32() // Opcode f3 0f 5a
2834 {
2835 uint8_t modrm = FETCH();
2836 if( modrm >= 0xc0 ) {
2837 XMM((modrm >> 3) & 0x7).f64[0] = XMM(modrm & 0x7).f[0];
2838 } else {
2839 XMM_REG s;
2840 uint32_t ea = GetEA(modrm, 0);
2841 s.d[0] = READ32(ea);
2842 XMM((modrm >> 3) & 0x7).f64[0] = s.f[0];
2843 }
2844 CYCLES(1); // TODO: correct cycle count
2845 }
2846
sse_cvttss2si_r32_r128m32()2847 void i386_device::sse_cvttss2si_r32_r128m32() // Opcode f3 0f 2c
2848 {
2849 int32_t src;
2850 uint8_t modrm = FETCH(); // get mordm byte
2851 if( modrm >= 0xc0 ) { // if bits 7-6 are 11 the source is a xmm register (low doubleword)
2852 src = (int32_t)XMM(modrm & 0x7).f[0^NATIVE_ENDIAN_VALUE_LE_BE(0,1)];
2853 } else { // otherwise is a memory address
2854 XMM_REG t;
2855 uint32_t ea = GetEA(modrm, 0);
2856 t.d[0] = READ32(ea);
2857 src = (int32_t)t.f[0];
2858 }
2859 STORE_REG32(modrm, (uint32_t)src);
2860 CYCLES(1); // TODO: correct cycle count
2861 }
2862
sse_cvtss2si_r32_r128m32()2863 void i386_device::sse_cvtss2si_r32_r128m32() // Opcode f3 0f 2d
2864 {
2865 int32_t src;
2866 uint8_t modrm = FETCH();
2867 if( modrm >= 0xc0 ) {
2868 src = (int32_t)XMM(modrm & 0x7).f[0];
2869 } else {
2870 XMM_REG t;
2871 uint32_t ea = GetEA(modrm, 0);
2872 t.d[0] = READ32(ea);
2873 src = (int32_t)t.f[0];
2874 }
2875 STORE_REG32(modrm, (uint32_t)src);
2876 CYCLES(1); // TODO: correct cycle count
2877 }
2878
sse_cvtsi2ss_r128_rm32()2879 void i386_device::sse_cvtsi2ss_r128_rm32() // Opcode f3 0f 2a
2880 {
2881 uint8_t modrm = FETCH();
2882 if( modrm >= 0xc0 ) {
2883 XMM((modrm >> 3) & 0x7).f[0] = (int32_t)LOAD_RM32(modrm);
2884 } else {
2885 uint32_t ea = GetEA(modrm, 0);
2886 XMM((modrm >> 3) & 0x7).f[0] = (int32_t)READ32(ea);
2887 }
2888 CYCLES(1); // TODO: correct cycle count
2889 }
2890
sse_cvtpi2ps_r128_rm64()2891 void i386_device::sse_cvtpi2ps_r128_rm64() // Opcode 0f 2a
2892 {
2893 uint8_t modrm = FETCH();
2894 MMXPROLOG();
2895 if( modrm >= 0xc0 ) {
2896 XMM((modrm >> 3) & 0x7).f[0] = (float)MMX(modrm & 0x7).i[0];
2897 XMM((modrm >> 3) & 0x7).f[1] = (float)MMX(modrm & 0x7).i[1];
2898 } else {
2899 MMX_REG r;
2900 uint32_t ea = GetEA(modrm, 0);
2901 READMMX(ea, r);
2902 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2903 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2904 }
2905 CYCLES(1); // TODO: correct cycle count
2906 }
2907
sse_cvttps2pi_r64_r128m64()2908 void i386_device::sse_cvttps2pi_r64_r128m64() // Opcode 0f 2c
2909 {
2910 uint8_t modrm = FETCH();
2911 MMXPROLOG();
2912 if( modrm >= 0xc0 ) {
2913 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2914 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2915 } else {
2916 XMM_REG r;
2917 uint32_t ea = GetEA(modrm, 0);
2918 READXMM(ea, r);
2919 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2920 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2921 }
2922 CYCLES(1); // TODO: correct cycle count
2923 }
2924
sse_cvtps2pi_r64_r128m64()2925 void i386_device::sse_cvtps2pi_r64_r128m64() // Opcode 0f 2d
2926 {
2927 uint8_t modrm = FETCH();
2928 MMXPROLOG();
2929 if( modrm >= 0xc0 ) {
2930 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
2931 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
2932 } else {
2933 XMM_REG r;
2934 uint32_t ea = GetEA(modrm, 0);
2935 READXMM(ea, r);
2936 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
2937 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
2938 }
2939 CYCLES(1); // TODO: correct cycle count
2940 }
2941
sse_cvtps2pd_r128_r128m64()2942 void i386_device::sse_cvtps2pd_r128_r128m64() // Opcode 0f 5a
2943 {
2944 uint8_t modrm = FETCH();
2945 if( modrm >= 0xc0 ) {
2946 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).f[0];
2947 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).f[1];
2948 } else {
2949 MMX_REG r;
2950 uint32_t ea = GetEA(modrm, 0);
2951 READMMX(ea, r);
2952 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.f[0];
2953 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.f[1];
2954 }
2955 CYCLES(1); // TODO: correct cycle count
2956 }
2957
sse_cvtdq2ps_r128_rm128()2958 void i386_device::sse_cvtdq2ps_r128_rm128() // Opcode 0f 5b
2959 {
2960 uint8_t modrm = FETCH();
2961 if( modrm >= 0xc0 ) {
2962 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).i[0];
2963 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).i[1];
2964 XMM((modrm >> 3) & 0x7).f[2] = (float)XMM(modrm & 0x7).i[2];
2965 XMM((modrm >> 3) & 0x7).f[3] = (float)XMM(modrm & 0x7).i[3];
2966 } else {
2967 XMM_REG r;
2968 uint32_t ea = GetEA(modrm, 0);
2969 READXMM(ea, r);
2970 XMM((modrm >> 3) & 0x7).f[0] = (float)r.i[0];
2971 XMM((modrm >> 3) & 0x7).f[1] = (float)r.i[1];
2972 XMM((modrm >> 3) & 0x7).f[2] = (float)r.i[2];
2973 XMM((modrm >> 3) & 0x7).f[3] = (float)r.i[3];
2974 }
2975 CYCLES(1); // TODO: correct cycle count
2976 }
2977
sse_cvtdq2pd_r128_r128m64()2978 void i386_device::sse_cvtdq2pd_r128_r128m64() // Opcode f3 0f e6
2979 {
2980 uint8_t modrm = FETCH();
2981 if( modrm >= 0xc0 ) {
2982 XMM((modrm >> 3) & 0x7).f64[0] = (double)XMM(modrm & 0x7).i[0];
2983 XMM((modrm >> 3) & 0x7).f64[1] = (double)XMM(modrm & 0x7).i[1];
2984 } else {
2985 MMX_REG s;
2986 uint32_t ea = GetEA(modrm, 0);
2987 READMMX(ea, s);
2988 XMM((modrm >> 3) & 0x7).f64[0] = (double)s.i[0];
2989 XMM((modrm >> 3) & 0x7).f64[1] = (double)s.i[1];
2990 }
2991 CYCLES(1); // TODO: correct cycle count
2992 }
2993
sse_movss_r128_rm128()2994 void i386_device::sse_movss_r128_rm128() // Opcode f3 0f 10
2995 {
2996 uint8_t modrm = FETCH();
2997 if( modrm >= 0xc0 ) {
2998 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
2999 } else {
3000 uint32_t ea = GetEA(modrm, 0);
3001 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
3002 }
3003 CYCLES(1); // TODO: correct cycle count
3004 }
3005
sse_movss_rm128_r128()3006 void i386_device::sse_movss_rm128_r128() // Opcode f3 0f 11
3007 {
3008 uint8_t modrm = FETCH();
3009 if( modrm >= 0xc0 ) {
3010 XMM(modrm & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0];
3011 } else {
3012 uint32_t ea = GetEA(modrm, 0);
3013 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3014 }
3015 CYCLES(1); // TODO: correct cycle count
3016 }
3017
sse_movsldup_r128_rm128()3018 void i386_device::sse_movsldup_r128_rm128() // Opcode f3 0f 12
3019 {
3020 uint8_t modrm = FETCH();
3021 if( modrm >= 0xc0 ) {
3022 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[0];
3023 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[0];
3024 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[2];
3025 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[2];
3026 } else {
3027 XMM_REG src;
3028 uint32_t ea = GetEA(modrm, 0);
3029 READXMM(ea, src);
3030 XMM((modrm >> 3) & 0x7).d[0] = src.d[0];
3031 XMM((modrm >> 3) & 0x7).d[1] = src.d[0];
3032 XMM((modrm >> 3) & 0x7).d[2] = src.d[2];
3033 XMM((modrm >> 3) & 0x7).d[3] = src.d[2];
3034 }
3035 CYCLES(1); // TODO: correct cycle count
3036 }
3037
sse_movshdup_r128_rm128()3038 void i386_device::sse_movshdup_r128_rm128() // Opcode f3 0f 16
3039 {
3040 uint8_t modrm = FETCH();
3041 if( modrm >= 0xc0 ) {
3042 XMM((modrm >> 3) & 0x7).d[0] = XMM(modrm & 0x7).d[1];
3043 XMM((modrm >> 3) & 0x7).d[1] = XMM(modrm & 0x7).d[1];
3044 XMM((modrm >> 3) & 0x7).d[2] = XMM(modrm & 0x7).d[3];
3045 XMM((modrm >> 3) & 0x7).d[3] = XMM(modrm & 0x7).d[3];
3046 } else {
3047 XMM_REG src;
3048 uint32_t ea = GetEA(modrm, 0);
3049 READXMM(ea, src);
3050 XMM((modrm >> 3) & 0x7).d[0] = src.d[1];
3051 XMM((modrm >> 3) & 0x7).d[1] = src.d[1];
3052 XMM((modrm >> 3) & 0x7).d[2] = src.d[3];
3053 XMM((modrm >> 3) & 0x7).d[3] = src.d[3];
3054 }
3055 CYCLES(1); // TODO: correct cycle count
3056 }
3057
sse_movaps_r128_rm128()3058 void i386_device::sse_movaps_r128_rm128() // Opcode 0f 28
3059 {
3060 uint8_t modrm = FETCH();
3061 if( modrm >= 0xc0 ) {
3062 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3063 } else {
3064 uint32_t ea = GetEA(modrm, 0);
3065 READXMM(ea, XMM((modrm >> 3) & 0x7));
3066 }
3067 CYCLES(1); // TODO: correct cycle count
3068 }
3069
sse_movaps_rm128_r128()3070 void i386_device::sse_movaps_rm128_r128() // Opcode 0f 29
3071 {
3072 uint8_t modrm = FETCH();
3073 if( modrm >= 0xc0 ) {
3074 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3075 } else {
3076 uint32_t ea = GetEA(modrm, 0);
3077 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
3078 }
3079 CYCLES(1); // TODO: correct cycle count
3080 }
3081
sse_movups_r128_rm128()3082 void i386_device::sse_movups_r128_rm128() // Opcode 0f 10
3083 {
3084 uint8_t modrm = FETCH();
3085 if( modrm >= 0xc0 ) {
3086 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3087 } else {
3088 uint32_t ea = GetEA(modrm, 0);
3089 READXMM(ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3090 }
3091 CYCLES(1); // TODO: correct cycle count
3092 }
3093
sse_movupd_r128_rm128()3094 void i386_device::sse_movupd_r128_rm128() // Opcode 66 0f 10
3095 {
3096 uint8_t modrm = FETCH();
3097 if( modrm >= 0xc0 ) {
3098 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
3099 } else {
3100 uint32_t ea = GetEA(modrm, 0);
3101 READXMM(ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3102 }
3103 CYCLES(1); // TODO: correct cycle count
3104 }
3105
sse_movups_rm128_r128()3106 void i386_device::sse_movups_rm128_r128() // Opcode 0f 11
3107 {
3108 uint8_t modrm = FETCH();
3109 if( modrm >= 0xc0 ) {
3110 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3111 } else {
3112 uint32_t ea = GetEA(modrm, 0);
3113 WRITEXMM(ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3114 }
3115 CYCLES(1); // TODO: correct cycle count
3116 }
3117
sse_movupd_rm128_r128()3118 void i386_device::sse_movupd_rm128_r128() // Opcode 66 0f 11
3119 {
3120 uint8_t modrm = FETCH();
3121 if( modrm >= 0xc0 ) {
3122 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
3123 } else {
3124 uint32_t ea = GetEA(modrm, 0);
3125 WRITEXMM(ea, XMM((modrm >> 3) & 0x7)); // address does not need to be 16-byte aligned
3126 }
3127 CYCLES(1); // TODO: correct cycle count
3128 }
3129
sse_movlps_r128_m64()3130 void i386_device::sse_movlps_r128_m64() // Opcode 0f 12
3131 {
3132 uint8_t modrm = FETCH();
3133 if( modrm >= 0xc0 ) {
3134 // MOVHLPS opcode
3135 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[1];
3136 CYCLES(1); // TODO: correct cycle count
3137 } else {
3138 // MOVLPS opcode
3139 uint32_t ea = GetEA(modrm, 0);
3140 READXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
3141 CYCLES(1); // TODO: correct cycle count
3142 }
3143 }
3144
sse_movlpd_r128_m64()3145 void i386_device::sse_movlpd_r128_m64() // Opcode 66 0f 12
3146 {
3147 uint8_t modrm = FETCH();
3148 if( modrm >= 0xc0 ) {
3149 CYCLES(1); // TODO: correct cycle count
3150 } else {
3151 // MOVLPS opcode
3152 uint32_t ea = GetEA(modrm, 0);
3153 READXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
3154 CYCLES(1); // TODO: correct cycle count
3155 }
3156 }
3157
sse_movlps_m64_r128()3158 void i386_device::sse_movlps_m64_r128() // Opcode 0f 13
3159 {
3160 uint8_t modrm = FETCH();
3161 if( modrm >= 0xc0 ) {
3162 // unsupported by cpu
3163 CYCLES(1); // TODO: correct cycle count
3164 } else {
3165 uint32_t ea = GetEA(modrm, 0);
3166 WRITEXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
3167 CYCLES(1); // TODO: correct cycle count
3168 }
3169 }
3170
sse_movlpd_m64_r128()3171 void i386_device::sse_movlpd_m64_r128() // Opcode 66 0f 13
3172 {
3173 uint8_t modrm = FETCH();
3174 if( modrm >= 0xc0 ) {
3175 // unsupported by cpu
3176 CYCLES(1); // TODO: correct cycle count
3177 } else {
3178 uint32_t ea = GetEA(modrm, 0);
3179 WRITEXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
3180 CYCLES(1); // TODO: correct cycle count
3181 }
3182 }
3183
sse_movhps_r128_m64()3184 void i386_device::sse_movhps_r128_m64() // Opcode 0f 16
3185 {
3186 uint8_t modrm = FETCH();
3187 if( modrm >= 0xc0 ) {
3188 // MOVLHPS opcode
3189 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[0];
3190 CYCLES(1); // TODO: correct cycle count
3191 } else {
3192 // MOVHPS opcode
3193 uint32_t ea = GetEA(modrm, 0);
3194 READXMM_HI64(ea, XMM((modrm >> 3) & 0x7));
3195 CYCLES(1); // TODO: correct cycle count
3196 }
3197 }
3198
sse_movhpd_r128_m64()3199 void i386_device::sse_movhpd_r128_m64() // Opcode 66 0f 16
3200 {
3201 uint8_t modrm = FETCH();
3202 if( modrm >= 0xc0 ) {
3203 // unsupported by cpu
3204 CYCLES(1); // TODO: correct cycle count
3205 } else {
3206 // MOVHPS opcode
3207 uint32_t ea = GetEA(modrm, 0);
3208 READXMM_HI64(ea, XMM((modrm >> 3) & 0x7));
3209 CYCLES(1); // TODO: correct cycle count
3210 }
3211 }
3212
sse_movhps_m64_r128()3213 void i386_device::sse_movhps_m64_r128() // Opcode 0f 17
3214 {
3215 uint8_t modrm = FETCH();
3216 if( modrm >= 0xc0 ) {
3217 // unsupported by cpu
3218 CYCLES(1); // TODO: correct cycle count
3219 } else {
3220 uint32_t ea = GetEA(modrm, 0);
3221 WRITEXMM_HI64(ea, XMM((modrm >> 3) & 0x7));
3222 CYCLES(1); // TODO: correct cycle count
3223 }
3224 }
3225
sse_movhpd_m64_r128()3226 void i386_device::sse_movhpd_m64_r128() // Opcode 66 0f 17
3227 {
3228 uint8_t modrm = FETCH();
3229 if( modrm >= 0xc0 ) {
3230 // unsupported by cpu
3231 CYCLES(1); // TODO: correct cycle count
3232 } else {
3233 uint32_t ea = GetEA(modrm, 0);
3234 WRITEXMM_HI64(ea, XMM((modrm >> 3) & 0x7));
3235 CYCLES(1); // TODO: correct cycle count
3236 }
3237 }
3238
sse_movntps_m128_r128()3239 void i386_device::sse_movntps_m128_r128() // Opcode 0f 2b
3240 {
3241 uint8_t modrm = FETCH();
3242 if( modrm >= 0xc0 ) {
3243 // unsupported by cpu
3244 CYCLES(1); // TODO: correct cycle count
3245 } else {
3246 // TODO: manage the cache if present
3247 uint32_t ea = GetEA(modrm, 0);
3248 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
3249 CYCLES(1); // TODO: correct cycle count
3250 }
3251 }
3252
sse_movmskps_r16_r128()3253 void i386_device::sse_movmskps_r16_r128() // Opcode 0f 50
3254 {
3255 uint8_t modrm = FETCH();
3256 if( modrm >= 0xc0 ) {
3257 int b;
3258 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3259 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3260 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3261 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3262 STORE_REG16(modrm, b);
3263 }
3264 CYCLES(1); // TODO: correct cycle count
3265 }
3266
sse_movmskps_r32_r128()3267 void i386_device::sse_movmskps_r32_r128() // Opcode 0f 50
3268 {
3269 uint8_t modrm = FETCH();
3270 if( modrm >= 0xc0 ) {
3271 int b;
3272 b=(XMM(modrm & 0x7).d[0] >> 31) & 1;
3273 b=b | ((XMM(modrm & 0x7).d[1] >> 30) & 2);
3274 b=b | ((XMM(modrm & 0x7).d[2] >> 29) & 4);
3275 b=b | ((XMM(modrm & 0x7).d[3] >> 28) & 8);
3276 STORE_REG32(modrm, b);
3277 }
3278 CYCLES(1); // TODO: correct cycle count
3279 }
3280
sse_movmskpd_r32_r128()3281 void i386_device::sse_movmskpd_r32_r128() // Opcode 66 0f 50
3282 {
3283 uint8_t modrm = FETCH();
3284 if( modrm >= 0xc0 ) {
3285 int b;
3286 b=(XMM(modrm & 0x7).q[0] >> 63) & 1;
3287 b=b | ((XMM(modrm & 0x7).q[1] >> 62) & 2);
3288 STORE_REG32(modrm, b);
3289 }
3290 CYCLES(1); // TODO: correct cycle count
3291 }
3292
sse_movq2dq_r128_r64()3293 void i386_device::sse_movq2dq_r128_r64() // Opcode f3 0f d6
3294 {
3295 MMXPROLOG();
3296 uint8_t modrm = FETCH();
3297 if( modrm >= 0xc0 ) {
3298 XMM((modrm >> 3) & 0x7).q[0] = MMX(modrm & 7).q;
3299 XMM((modrm >> 3) & 0x7).q[1] = 0;
3300 }
3301 CYCLES(1); // TODO: correct cycle count
3302 }
3303
sse_movdqu_r128_rm128()3304 void i386_device::sse_movdqu_r128_rm128() // Opcode f3 0f 6f
3305 {
3306 MMXPROLOG();
3307 uint8_t modrm = FETCH();
3308 if( modrm >= 0xc0 ) {
3309 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3310 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3311 } else {
3312 uint32_t ea = GetEA(modrm, 0);
3313 READXMM(ea, XMM((modrm >> 3) & 0x7));
3314 }
3315 CYCLES(1); // TODO: correct cycle count
3316 }
3317
sse_movdqu_rm128_r128()3318 void i386_device::sse_movdqu_rm128_r128() // Opcode f3 0f 7f
3319 {
3320 MMXPROLOG();
3321 uint8_t modrm = FETCH();
3322 if( modrm >= 0xc0 ) {
3323 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3324 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3325 } else {
3326 uint32_t ea = GetEA(modrm, 0);
3327 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
3328 }
3329 CYCLES(1); // TODO: correct cycle count
3330 }
3331
sse_movd_m128_rm32()3332 void i386_device::sse_movd_m128_rm32() // Opcode 66 0f 6e
3333 {
3334 uint8_t modrm = FETCH();
3335 if (modrm >= 0xc0) {
3336 XMM((modrm >> 3) & 0x7).d[0] = LOAD_RM32(modrm);
3337 }
3338 else {
3339 uint32_t ea = GetEA(modrm, 0);
3340 XMM((modrm >> 3) & 0x7).d[0] = READ32(ea);
3341 }
3342 XMM((modrm >> 3) & 0x7).d[1] = 0;
3343 XMM((modrm >> 3) & 0x7).q[1] = 0;
3344 CYCLES(1); // TODO: correct cycle count
3345 }
3346
sse_movdqa_m128_rm128()3347 void i386_device::sse_movdqa_m128_rm128() // Opcode 66 0f 6f
3348 {
3349 uint8_t modrm = FETCH();
3350 if (modrm >= 0xc0) {
3351 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3352 XMM((modrm >> 3) & 0x7).q[1] = XMM(modrm & 0x7).q[1];
3353 }
3354 else {
3355 uint32_t ea = GetEA(modrm, 0);
3356 READXMM(ea, XMM((modrm >> 3) & 0x7));
3357 }
3358 CYCLES(1); // TODO: correct cycle count
3359 }
3360
sse_movq_r128_r128m64()3361 void i386_device::sse_movq_r128_r128m64() // Opcode f3 0f 7e
3362 {
3363 MMXPROLOG();
3364 uint8_t modrm = FETCH();
3365 if( modrm >= 0xc0 ) {
3366 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
3367 XMM((modrm >> 3) & 0x7).q[1] = 0;
3368 } else {
3369 uint32_t ea = GetEA(modrm, 0);
3370 XMM((modrm >> 3) & 0x7).q[0] = READ64(ea);
3371 XMM((modrm >> 3) & 0x7).q[1] = 0;
3372 }
3373 CYCLES(1); // TODO: correct cycle count
3374 }
3375
sse_movd_rm32_r128()3376 void i386_device::sse_movd_rm32_r128() // Opcode 66 0f 7e
3377 {
3378 uint8_t modrm = FETCH();
3379 if (modrm >= 0xc0) {
3380 STORE_RM32(modrm, XMM((modrm >> 3) & 0x7).d[0]);
3381 }
3382 else {
3383 uint32_t ea = GetEA(modrm, 0);
3384 WRITE32(ea, XMM((modrm >> 3) & 0x7).d[0]);
3385 }
3386 CYCLES(1); // TODO: correct cycle count
3387 }
3388
sse_movdqa_rm128_r128()3389 void i386_device::sse_movdqa_rm128_r128() // Opcode 66 0f 7f
3390 {
3391 uint8_t modrm = FETCH();
3392 if (modrm >= 0xc0) {
3393 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
3394 XMM(modrm & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1];
3395 }
3396 else {
3397 uint32_t ea = GetEA(modrm, 0);
3398 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
3399 }
3400 CYCLES(1); // TODO: correct cycle count
3401 }
3402
sse_pmovmskb_r16_r64()3403 void i386_device::sse_pmovmskb_r16_r64() // Opcode 0f d7
3404 {
3405 //MMXPROLOG();
3406 uint8_t modrm = FETCH();
3407 if( modrm >= 0xc0 ) {
3408 int b;
3409 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3410 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3411 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3412 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3413 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3414 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3415 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3416 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3417 STORE_REG16(modrm, b);
3418 }
3419 CYCLES(1); // TODO: correct cycle count
3420 }
3421
sse_pmovmskb_r32_r64()3422 void i386_device::sse_pmovmskb_r32_r64() // Opcode 0f d7
3423 {
3424 //MMXPROLOG();
3425 uint8_t modrm = FETCH();
3426 if( modrm >= 0xc0 ) {
3427 int b;
3428 b=(MMX(modrm & 0x7).b[0] >> 7) & 1;
3429 b=b | ((MMX(modrm & 0x7).b[1] >> 6) & 2);
3430 b=b | ((MMX(modrm & 0x7).b[2] >> 5) & 4);
3431 b=b | ((MMX(modrm & 0x7).b[3] >> 4) & 8);
3432 b=b | ((MMX(modrm & 0x7).b[4] >> 3) & 16);
3433 b=b | ((MMX(modrm & 0x7).b[5] >> 2) & 32);
3434 b=b | ((MMX(modrm & 0x7).b[6] >> 1) & 64);
3435 b=b | ((MMX(modrm & 0x7).b[7] >> 0) & 128);
3436 STORE_REG32(modrm, b);
3437 }
3438 CYCLES(1); // TODO: correct cycle count
3439 }
3440
sse_pmovmskb_r32_r128()3441 void i386_device::sse_pmovmskb_r32_r128() // Opcode 66 0f d7
3442 {
3443 uint8_t modrm = FETCH();
3444 if( modrm >= 0xc0 ) {
3445 uint32_t b;
3446 b=(XMM(modrm & 0x7).b[0] >> 7) & 1;
3447 b=b | ((XMM(modrm & 0x7).b[1] >> 6) & 2);
3448 b=b | ((XMM(modrm & 0x7).b[2] >> 5) & 4);
3449 b=b | ((XMM(modrm & 0x7).b[3] >> 4) & 8);
3450 b=b | ((XMM(modrm & 0x7).b[4] >> 3) & 16);
3451 b=b | ((XMM(modrm & 0x7).b[5] >> 2) & 32);
3452 b=b | ((XMM(modrm & 0x7).b[6] >> 1) & 64);
3453 b=b | ((XMM(modrm & 0x7).b[7] >> 0) & 128);
3454 b=b | ((XMM(modrm & 0x7).b[8] << 1) & 256);
3455 b=b | ((XMM(modrm & 0x7).b[9] << 2) & 512);
3456 b=b | ((XMM(modrm & 0x7).b[10] << 3) & 1024);
3457 b=b | ((XMM(modrm & 0x7).b[11] << 4) & 2048);
3458 b=b | ((XMM(modrm & 0x7).b[12] << 5) & 4096);
3459 b=b | ((XMM(modrm & 0x7).b[13] << 6) & 8192);
3460 b=b | ((XMM(modrm & 0x7).b[14] << 7) & 16384);
3461 b=b | ((XMM(modrm & 0x7).b[15] << 8) & 32768);
3462 STORE_REG32(modrm, b);
3463 }
3464 CYCLES(1); // TODO: correct cycle count
3465 }
3466
sse_xorps()3467 void i386_device::sse_xorps() // Opcode 0f 57
3468 {
3469 uint8_t modrm = FETCH();
3470 if( modrm >= 0xc0 ) {
3471 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ XMM(modrm & 0x7).d[0];
3472 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ XMM(modrm & 0x7).d[1];
3473 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ XMM(modrm & 0x7).d[2];
3474 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ XMM(modrm & 0x7).d[3];
3475 } else {
3476 XMM_REG src;
3477 uint32_t ea = GetEA(modrm, 0);
3478 READXMM(ea, src);
3479 XMM((modrm >> 3) & 0x7).d[0] = XMM((modrm >> 3) & 0x7).d[0] ^ src.d[0];
3480 XMM((modrm >> 3) & 0x7).d[1] = XMM((modrm >> 3) & 0x7).d[1] ^ src.d[1];
3481 XMM((modrm >> 3) & 0x7).d[2] = XMM((modrm >> 3) & 0x7).d[2] ^ src.d[2];
3482 XMM((modrm >> 3) & 0x7).d[3] = XMM((modrm >> 3) & 0x7).d[3] ^ src.d[3];
3483 }
3484 CYCLES(1); // TODO: correct cycle count
3485 }
3486
sse_xorpd_r128_rm128()3487 void i386_device::sse_xorpd_r128_rm128() // Opcode 66 0f 57
3488 {
3489 uint8_t modrm = FETCH();
3490 if( modrm >= 0xc0 ) {
3491 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 0x7).q[0];
3492 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 0x7).q[1];
3493 } else {
3494 XMM_REG src;
3495 uint32_t ea = GetEA(modrm, 0);
3496 READXMM(ea, src);
3497 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] ^ src.q[0];
3498 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] ^ src.q[1];
3499 }
3500 CYCLES(1); // TODO: correct cycle count
3501 }
3502
sse_addps()3503 void i386_device::sse_addps() // Opcode 0f 58
3504 {
3505 uint8_t modrm = FETCH();
3506 if( modrm >= 0xc0 ) {
3507 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3508 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
3509 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + XMM(modrm & 0x7).f[2];
3510 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
3511 } else {
3512 XMM_REG src;
3513 uint32_t ea = GetEA(modrm, 0);
3514 READXMM(ea, src);
3515 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3516 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
3517 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] + src.f[2];
3518 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
3519 }
3520 CYCLES(1); // TODO: correct cycle count
3521 }
3522
sse_sqrtps_r128_rm128()3523 void i386_device::sse_sqrtps_r128_rm128() // Opcode 0f 51
3524 {
3525 uint8_t modrm = FETCH();
3526 if( modrm >= 0xc0 ) {
3527 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3528 XMM((modrm >> 3) & 0x7).f[1] = sqrt(XMM(modrm & 0x7).f[1]);
3529 XMM((modrm >> 3) & 0x7).f[2] = sqrt(XMM(modrm & 0x7).f[2]);
3530 XMM((modrm >> 3) & 0x7).f[3] = sqrt(XMM(modrm & 0x7).f[3]);
3531 } else {
3532 XMM_REG src;
3533 uint32_t ea = GetEA(modrm, 0);
3534 READXMM(ea, src);
3535 XMM((modrm >> 3) & 0x7).f[0] = sqrt(src.f[0]);
3536 XMM((modrm >> 3) & 0x7).f[1] = sqrt(src.f[1]);
3537 XMM((modrm >> 3) & 0x7).f[2] = sqrt(src.f[2]);
3538 XMM((modrm >> 3) & 0x7).f[3] = sqrt(src.f[3]);
3539 }
3540 CYCLES(1); // TODO: correct cycle count
3541 }
3542
sse_rsqrtps_r128_rm128()3543 void i386_device::sse_rsqrtps_r128_rm128() // Opcode 0f 52
3544 {
3545 uint8_t modrm = FETCH();
3546 if( modrm >= 0xc0 ) {
3547 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3548 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(XMM(modrm & 0x7).f[1]);
3549 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(XMM(modrm & 0x7).f[2]);
3550 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(XMM(modrm & 0x7).f[3]);
3551 } else {
3552 XMM_REG src;
3553 uint32_t ea = GetEA(modrm, 0);
3554 READXMM(ea, src);
3555 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(src.f[0]);
3556 XMM((modrm >> 3) & 0x7).f[1] = 1.0 / sqrt(src.f[1]);
3557 XMM((modrm >> 3) & 0x7).f[2] = 1.0 / sqrt(src.f[2]);
3558 XMM((modrm >> 3) & 0x7).f[3] = 1.0 / sqrt(src.f[3]);
3559 }
3560 CYCLES(1); // TODO: correct cycle count
3561 }
3562
sse_rcpps_r128_rm128()3563 void i386_device::sse_rcpps_r128_rm128() // Opcode 0f 53
3564 {
3565 uint8_t modrm = FETCH();
3566 if( modrm >= 0xc0 ) {
3567 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / XMM(modrm & 0x7).f[0];
3568 XMM((modrm >> 3) & 0x7).f[1] = 1.0f / XMM(modrm & 0x7).f[1];
3569 XMM((modrm >> 3) & 0x7).f[2] = 1.0f / XMM(modrm & 0x7).f[2];
3570 XMM((modrm >> 3) & 0x7).f[3] = 1.0f / XMM(modrm & 0x7).f[3];
3571 } else {
3572 XMM_REG src;
3573 uint32_t ea = GetEA(modrm, 0);
3574 READXMM(ea, src);
3575 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / src.f[0];
3576 XMM((modrm >> 3) & 0x7).f[1] = 1.0f / src.f[1];
3577 XMM((modrm >> 3) & 0x7).f[2] = 1.0f / src.f[2];
3578 XMM((modrm >> 3) & 0x7).f[3] = 1.0f / src.f[3];
3579 }
3580 CYCLES(1); // TODO: correct cycle count
3581 }
3582
sse_andps_r128_rm128()3583 void i386_device::sse_andps_r128_rm128() // Opcode 0f 54
3584 {
3585 uint8_t modrm = FETCH();
3586 if( modrm >= 0xc0 ) {
3587 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3588 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3589 } else {
3590 XMM_REG src;
3591 uint32_t ea = GetEA(modrm, 0);
3592 READXMM(ea, src);
3593 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3594 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3595 }
3596 CYCLES(1); // TODO: correct cycle count
3597 }
3598
sse_andpd_r128_rm128()3599 void i386_device::sse_andpd_r128_rm128() // Opcode 66 0f 54
3600 {
3601 uint8_t modrm = FETCH();
3602 if( modrm >= 0xc0 ) {
3603 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 0x7).q[0];
3604 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 0x7).q[1];
3605 } else {
3606 XMM_REG src;
3607 uint32_t ea = GetEA(modrm, 0);
3608 READXMM(ea, src);
3609 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
3610 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
3611 }
3612 CYCLES(1); // TODO: correct cycle count
3613 }
3614
sse_andnps_r128_rm128()3615 void i386_device::sse_andnps_r128_rm128() // Opcode 0f 55
3616 {
3617 uint8_t modrm = FETCH();
3618 if( modrm >= 0xc0 ) {
3619 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3620 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3621 } else {
3622 XMM_REG src;
3623 uint32_t ea = GetEA(modrm, 0);
3624 READXMM(ea, src);
3625 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3626 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3627 }
3628 CYCLES(1); // TODO: correct cycle count
3629 }
3630
sse_andnpd_r128_rm128()3631 void i386_device::sse_andnpd_r128_rm128() // Opcode 66 0f 55
3632 {
3633 uint8_t modrm = FETCH();
3634 if( modrm >= 0xc0 ) {
3635 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 0x7).q[0];
3636 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 0x7).q[1];
3637 } else {
3638 XMM_REG src;
3639 uint32_t ea = GetEA(modrm, 0);
3640 READXMM(ea, src);
3641 XMM((modrm >> 3) & 0x7).q[0] = ~(XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
3642 XMM((modrm >> 3) & 0x7).q[1] = ~(XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
3643 }
3644 CYCLES(1); // TODO: correct cycle count
3645 }
3646
sse_orps_r128_rm128()3647 void i386_device::sse_orps_r128_rm128() // Opcode 0f 56
3648 {
3649 uint8_t modrm = FETCH();
3650 if( modrm >= 0xc0 ) {
3651 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3652 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3653 } else {
3654 XMM_REG src;
3655 uint32_t ea = GetEA(modrm, 0);
3656 READXMM(ea, src);
3657 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3658 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3659 }
3660 CYCLES(1); // TODO: correct cycle count
3661 }
3662
sse_orpd_r128_rm128()3663 void i386_device::sse_orpd_r128_rm128() // Opcode 66 0f 56
3664 {
3665 uint8_t modrm = FETCH();
3666 if( modrm >= 0xc0 ) {
3667 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 0x7).q[0];
3668 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 0x7).q[1];
3669 } else {
3670 XMM_REG src;
3671 uint32_t ea = GetEA(modrm, 0);
3672 READXMM(ea, src);
3673 XMM((modrm >> 3) & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0] | src.q[0];
3674 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[1] | src.q[1];
3675 }
3676 CYCLES(1); // TODO: correct cycle count
3677 }
3678
sse_mulps()3679 void i386_device::sse_mulps() // Opcode 0f 59 ????
3680 {
3681 uint8_t modrm = FETCH();
3682 if( modrm >= 0xc0 ) {
3683 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3684 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * XMM(modrm & 0x7).f[1];
3685 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * XMM(modrm & 0x7).f[2];
3686 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * XMM(modrm & 0x7).f[3];
3687 } else {
3688 XMM_REG src;
3689 uint32_t ea = GetEA(modrm, 0);
3690 READXMM(ea, src);
3691 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3692 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] * src.f[1];
3693 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] * src.f[2];
3694 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] * src.f[3];
3695 }
3696 CYCLES(1); // TODO: correct cycle count
3697 }
3698
sse_subps()3699 void i386_device::sse_subps() // Opcode 0f 5c
3700 {
3701 uint8_t modrm = FETCH();
3702 if( modrm >= 0xc0 ) {
3703 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3704 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - XMM(modrm & 0x7).f[1];
3705 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
3706 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - XMM(modrm & 0x7).f[3];
3707 } else {
3708 XMM_REG src;
3709 uint32_t ea = GetEA(modrm, 0);
3710 READXMM(ea, src);
3711 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3712 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] - src.f[1];
3713 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
3714 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] - src.f[3];
3715 }
3716 CYCLES(1); // TODO: correct cycle count
3717 }
3718
sse_min_single(float src1,float src2)3719 static inline float sse_min_single(float src1, float src2)
3720 {
3721 /*if ((src1 == 0) && (src2 == 0))
3722 return src2;
3723 if (src1 = SNaN)
3724 return src2;
3725 if (src2 = SNaN)
3726 return src2;*/
3727 if (src1 < src2)
3728 return src1;
3729 return src2;
3730 }
3731
sse_min_double(double src1,double src2)3732 static inline double sse_min_double(double src1, double src2)
3733 {
3734 /*if ((src1 == 0) && (src2 == 0))
3735 return src2;
3736 if (src1 = SNaN)
3737 return src2;
3738 if (src2 = SNaN)
3739 return src2;*/
3740 if (src1 < src2)
3741 return src1;
3742 return src2;
3743 }
3744
sse_minps()3745 void i386_device::sse_minps() // Opcode 0f 5d
3746 {
3747 uint8_t modrm = FETCH();
3748 if( modrm >= 0xc0 ) {
3749 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3750 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3751 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3752 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3753 } else {
3754 XMM_REG src;
3755 uint32_t ea = GetEA(modrm, 0);
3756 READXMM(ea, src);
3757 XMM((modrm >> 3) & 0x7).f[0] = sse_min_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3758 XMM((modrm >> 3) & 0x7).f[1] = sse_min_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3759 XMM((modrm >> 3) & 0x7).f[2] = sse_min_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3760 XMM((modrm >> 3) & 0x7).f[3] = sse_min_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3761 }
3762 CYCLES(1); // TODO: correct cycle count
3763 }
3764
sse_divps()3765 void i386_device::sse_divps() // Opcode 0f 5e
3766 {
3767 uint8_t modrm = FETCH();
3768 if( modrm >= 0xc0 ) {
3769 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3770 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / XMM(modrm & 0x7).f[1];
3771 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / XMM(modrm & 0x7).f[2];
3772 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / XMM(modrm & 0x7).f[3];
3773 } else {
3774 XMM_REG src;
3775 uint32_t ea = GetEA(modrm, 0);
3776 READXMM(ea, src);
3777 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3778 XMM((modrm >> 3) & 0x7).f[1] = XMM((modrm >> 3) & 0x7).f[1] / src.f[1];
3779 XMM((modrm >> 3) & 0x7).f[2] = XMM((modrm >> 3) & 0x7).f[2] / src.f[2];
3780 XMM((modrm >> 3) & 0x7).f[3] = XMM((modrm >> 3) & 0x7).f[3] / src.f[3];
3781 }
3782 CYCLES(1); // TODO: correct cycle count
3783 }
3784
sse_max_single(float src1,float src2)3785 static inline float sse_max_single(float src1, float src2)
3786 {
3787 /*if ((src1 == 0) && (src2 == 0))
3788 return src2;
3789 if (src1 = SNaN)
3790 return src2;
3791 if (src2 = SNaN)
3792 return src2;*/
3793 if (src1 > src2)
3794 return src1;
3795 return src2;
3796 }
3797
sse_max_double(double src1,double src2)3798 static inline double sse_max_double(double src1, double src2)
3799 {
3800 /*if ((src1 == 0) && (src2 == 0))
3801 return src2;
3802 if (src1 = SNaN)
3803 return src2;
3804 if (src2 = SNaN)
3805 return src2;*/
3806 if (src1 > src2)
3807 return src1;
3808 return src2;
3809 }
3810
sse_maxps()3811 void i386_device::sse_maxps() // Opcode 0f 5f
3812 {
3813 uint8_t modrm = FETCH();
3814 if( modrm >= 0xc0 ) {
3815 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3816 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], XMM(modrm & 0x7).f[1]);
3817 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], XMM(modrm & 0x7).f[2]);
3818 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], XMM(modrm & 0x7).f[3]);
3819 } else {
3820 XMM_REG src;
3821 uint32_t ea = GetEA(modrm, 0);
3822 READXMM(ea, src);
3823 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3824 XMM((modrm >> 3) & 0x7).f[1] = sse_max_single(XMM((modrm >> 3) & 0x7).f[1], src.f[1]);
3825 XMM((modrm >> 3) & 0x7).f[2] = sse_max_single(XMM((modrm >> 3) & 0x7).f[2], src.f[2]);
3826 XMM((modrm >> 3) & 0x7).f[3] = sse_max_single(XMM((modrm >> 3) & 0x7).f[3], src.f[3]);
3827 }
3828 CYCLES(1); // TODO: correct cycle count
3829 }
3830
sse_maxss_r128_r128m32()3831 void i386_device::sse_maxss_r128_r128m32() // Opcode f3 0f 5f
3832 {
3833 uint8_t modrm = FETCH();
3834 if( modrm >= 0xc0 ) {
3835 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], XMM(modrm & 0x7).f[0]);
3836 } else {
3837 XMM_REG src;
3838 uint32_t ea = GetEA(modrm, 0);
3839 src.d[0]=READ32(ea);
3840 XMM((modrm >> 3) & 0x7).f[0] = sse_max_single(XMM((modrm >> 3) & 0x7).f[0], src.f[0]);
3841 }
3842 CYCLES(1); // TODO: correct cycle count
3843 }
3844
sse_addss()3845 void i386_device::sse_addss() // Opcode f3 0f 58
3846 {
3847 uint8_t modrm = FETCH();
3848 if( modrm >= 0xc0 ) {
3849 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + XMM(modrm & 0x7).f[0];
3850 } else {
3851 XMM_REG src;
3852 uint32_t ea = GetEA(modrm, 0);
3853 READXMM(ea, src);
3854 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] + src.f[0];
3855 }
3856 CYCLES(1); // TODO: correct cycle count
3857 }
3858
sse_subss()3859 void i386_device::sse_subss() // Opcode f3 0f 5c
3860 {
3861 uint8_t modrm = FETCH();
3862 if( modrm >= 0xc0 ) {
3863 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
3864 } else {
3865 XMM_REG src;
3866 uint32_t ea = GetEA(modrm, 0);
3867 READXMM(ea, src);
3868 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
3869 }
3870 CYCLES(1); // TODO: correct cycle count
3871 }
3872
sse_mulss()3873 void i386_device::sse_mulss() // Opcode f3 0f 5e
3874 {
3875 uint8_t modrm = FETCH();
3876 if( modrm >= 0xc0 ) {
3877 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * XMM(modrm & 0x7).f[0];
3878 } else {
3879 XMM_REG src;
3880 uint32_t ea = GetEA(modrm, 0);
3881 READXMM(ea, src);
3882 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] * src.f[0];
3883 }
3884 CYCLES(1); // TODO: correct cycle count
3885 }
3886
sse_divss()3887 void i386_device::sse_divss() // Opcode 0f 59
3888 {
3889 uint8_t modrm = FETCH();
3890 if( modrm >= 0xc0 ) {
3891 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / XMM(modrm & 0x7).f[0];
3892 } else {
3893 XMM_REG src;
3894 uint32_t ea = GetEA(modrm, 0);
3895 READXMM(ea, src);
3896 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] / src.f[0];
3897 }
3898 CYCLES(1); // TODO: correct cycle count
3899 }
3900
sse_rcpss_r128_r128m32()3901 void i386_device::sse_rcpss_r128_r128m32() // Opcode f3 0f 53
3902 {
3903 uint8_t modrm = FETCH();
3904 if( modrm >= 0xc0 ) {
3905 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / XMM(modrm & 0x7).f[0];
3906 } else {
3907 XMM_REG s;
3908 uint32_t ea = GetEA(modrm, 0);
3909 s.d[0]=READ32(ea);
3910 XMM((modrm >> 3) & 0x7).f[0] = 1.0f / s.f[0];
3911 }
3912 CYCLES(1); // TODO: correct cycle count
3913 }
3914
sse_sqrtss_r128_r128m32()3915 void i386_device::sse_sqrtss_r128_r128m32() // Opcode f3 0f 51
3916 {
3917 uint8_t modrm = FETCH();
3918 if( modrm >= 0xc0 ) {
3919 XMM((modrm >> 3) & 0x7).f[0] = sqrt(XMM(modrm & 0x7).f[0]);
3920 } else {
3921 XMM_REG s;
3922 uint32_t ea = GetEA(modrm, 0);
3923 s.d[0]=READ32(ea);
3924 XMM((modrm >> 3) & 0x7).f[0] = sqrt(s.f[0]);
3925 }
3926 CYCLES(1); // TODO: correct cycle count
3927 }
3928
sse_rsqrtss_r128_r128m32()3929 void i386_device::sse_rsqrtss_r128_r128m32() // Opcode f3 0f 52
3930 {
3931 uint8_t modrm = FETCH();
3932 if( modrm >= 0xc0 ) {
3933 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(XMM(modrm & 0x7).f[0]);
3934 } else {
3935 XMM_REG s;
3936 uint32_t ea = GetEA(modrm, 0);
3937 s.d[0]=READ32(ea);
3938 XMM((modrm >> 3) & 0x7).f[0] = 1.0 / sqrt(s.f[0]);
3939 }
3940 CYCLES(1); // TODO: correct cycle count
3941 }
3942
sse_minss_r128_r128m32()3943 void i386_device::sse_minss_r128_r128m32() // Opcode f3 0f 5d
3944 {
3945 uint8_t modrm = FETCH();
3946 if( modrm >= 0xc0 ) {
3947 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < XMM(modrm & 0x7).f[0] ? XMM((modrm >> 3) & 0x7).f[0] : XMM(modrm & 0x7).f[0];
3948 } else {
3949 XMM_REG s;
3950 uint32_t ea = GetEA(modrm, 0);
3951 s.d[0] = READ32(ea);
3952 XMM((modrm >> 3) & 0x7).f[0] = XMM((modrm >> 3) & 0x7).f[0] < s.f[0] ? XMM((modrm >> 3) & 0x7).f[0] : s.f[0];
3953 }
3954 CYCLES(1); // TODO: correct cycle count
3955 }
3956
sse_comiss_r128_r128m32()3957 void i386_device::sse_comiss_r128_r128m32() // Opcode 0f 2f
3958 {
3959 float32 a,b;
3960 uint8_t modrm = FETCH();
3961 if( modrm >= 0xc0 ) {
3962 a = XMM((modrm >> 3) & 0x7).d[0];
3963 b = XMM(modrm & 0x7).d[0];
3964 } else {
3965 XMM_REG src;
3966 uint32_t ea = GetEA(modrm, 0);
3967 READXMM(ea, src);
3968 a = XMM((modrm >> 3) & 0x7).d[0];
3969 b = src.d[0];
3970 }
3971 m_OF=0;
3972 m_SF=0;
3973 m_AF=0;
3974 if (float32_is_nan(a) || float32_is_nan(b))
3975 {
3976 m_ZF = 1;
3977 m_PF = 1;
3978 m_CF = 1;
3979 }
3980 else
3981 {
3982 m_ZF = 0;
3983 m_PF = 0;
3984 m_CF = 0;
3985 if (float32_eq(a, b))
3986 m_ZF = 1;
3987 if (float32_lt(a, b))
3988 m_CF = 1;
3989 }
3990 // should generate exception when at least one of the operands is either QNaN or SNaN
3991 CYCLES(1); // TODO: correct cycle count
3992 }
3993
sse_comisd_r128_r128m64()3994 void i386_device::sse_comisd_r128_r128m64() // Opcode 66 0f 2f
3995 {
3996 float64 a,b;
3997 uint8_t modrm = FETCH();
3998 if( modrm >= 0xc0 ) {
3999 a = XMM((modrm >> 3) & 0x7).q[0];
4000 b = XMM(modrm & 0x7).q[0];
4001 } else {
4002 XMM_REG src;
4003 uint32_t ea = GetEA(modrm, 0);
4004 READXMM(ea, src);
4005 a = XMM((modrm >> 3) & 0x7).q[0];
4006 b = src.q[0];
4007 }
4008 m_OF=0;
4009 m_SF=0;
4010 m_AF=0;
4011 if (float64_is_nan(a) || float64_is_nan(b))
4012 {
4013 m_ZF = 1;
4014 m_PF = 1;
4015 m_CF = 1;
4016 }
4017 else
4018 {
4019 m_ZF = 0;
4020 m_PF = 0;
4021 m_CF = 0;
4022 if (float64_eq(a, b))
4023 m_ZF = 1;
4024 if (float64_lt(a, b))
4025 m_CF = 1;
4026 }
4027 // should generate exception when at least one of the operands is either QNaN or SNaN
4028 CYCLES(1); // TODO: correct cycle count
4029 }
4030
sse_ucomiss_r128_r128m32()4031 void i386_device::sse_ucomiss_r128_r128m32() // Opcode 0f 2e
4032 {
4033 float32 a,b;
4034 uint8_t modrm = FETCH();
4035 if( modrm >= 0xc0 ) {
4036 a = XMM((modrm >> 3) & 0x7).d[0];
4037 b = XMM(modrm & 0x7).d[0];
4038 } else {
4039 XMM_REG src;
4040 uint32_t ea = GetEA(modrm, 0);
4041 READXMM(ea, src);
4042 a = XMM((modrm >> 3) & 0x7).d[0];
4043 b = src.d[0];
4044 }
4045 m_OF=0;
4046 m_SF=0;
4047 m_AF=0;
4048 if (float32_is_nan(a) || float32_is_nan(b))
4049 {
4050 m_ZF = 1;
4051 m_PF = 1;
4052 m_CF = 1;
4053 }
4054 else
4055 {
4056 m_ZF = 0;
4057 m_PF = 0;
4058 m_CF = 0;
4059 if (float32_eq(a, b))
4060 m_ZF = 1;
4061 if (float32_lt(a, b))
4062 m_CF = 1;
4063 }
4064 // should generate exception when at least one of the operands is SNaN
4065 CYCLES(1); // TODO: correct cycle count
4066 }
4067
sse_ucomisd_r128_r128m64()4068 void i386_device::sse_ucomisd_r128_r128m64() // Opcode 66 0f 2e
4069 {
4070 float64 a,b;
4071 uint8_t modrm = FETCH();
4072 if( modrm >= 0xc0 ) {
4073 a = XMM((modrm >> 3) & 0x7).q[0];
4074 b = XMM(modrm & 0x7).q[0];
4075 } else {
4076 XMM_REG src;
4077 uint32_t ea = GetEA(modrm, 0);
4078 READXMM(ea, src);
4079 a = XMM((modrm >> 3) & 0x7).q[0];
4080 b = src.q[0];
4081 }
4082 m_OF=0;
4083 m_SF=0;
4084 m_AF=0;
4085 if (float64_is_nan(a) || float64_is_nan(b))
4086 {
4087 m_ZF = 1;
4088 m_PF = 1;
4089 m_CF = 1;
4090 }
4091 else
4092 {
4093 m_ZF = 0;
4094 m_PF = 0;
4095 m_CF = 0;
4096 if (float64_eq(a, b))
4097 m_ZF = 1;
4098 if (float64_lt(a, b))
4099 m_CF = 1;
4100 }
4101 // should generate exception when at least one of the operands is SNaN
4102 CYCLES(1); // TODO: correct cycle count
4103 }
4104
sse_shufps()4105 void i386_device::sse_shufps() // Opcode 0f c6
4106 {
4107 uint8_t modrm = FETCH();
4108 uint8_t sel = FETCH();
4109 int m1,m2,m3,m4;
4110 int s,d;
4111 m1=sel & 3;
4112 m2=(sel >> 2) & 3;
4113 m3=(sel >> 4) & 3;
4114 m4=(sel >> 6) & 3;
4115 s=modrm & 0x7;
4116 d=(modrm >> 3) & 0x7;
4117 if( modrm >= 0xc0 ) {
4118 uint32_t t1,t2,t3,t4;
4119 t1=XMM(d).d[m1];
4120 t2=XMM(d).d[m2];
4121 t3=XMM(s).d[m3];
4122 t4=XMM(s).d[m4];
4123 XMM(d).d[0]=t1;
4124 XMM(d).d[1]=t2;
4125 XMM(d).d[2]=t3;
4126 XMM(d).d[3]=t4;
4127 } else {
4128 uint32_t t1,t2;
4129 XMM_REG src;
4130 uint32_t ea = GetEA(modrm, 0);
4131 READXMM(ea, src);
4132 t1=XMM(d).d[m1];
4133 t2=XMM(d).d[m2];
4134 XMM(d).d[0]=t1;
4135 XMM(d).d[1]=t2;
4136 XMM(d).d[2]=src.d[m3];
4137 XMM(d).d[3]=src.d[m4];
4138 }
4139 CYCLES(1); // TODO: correct cycle count
4140 }
4141
sse_shufpd_r128_rm128_i8()4142 void i386_device::sse_shufpd_r128_rm128_i8() // Opcode 66 0f c6
4143 {
4144 uint8_t modrm = FETCH();
4145 uint8_t sel = FETCH();
4146 int m1,m2;
4147 int s,d;
4148 m1=sel & 1;
4149 m2=(sel >> 1) & 1;
4150 s=modrm & 0x7;
4151 d=(modrm >> 3) & 0x7;
4152 if( modrm >= 0xc0 ) {
4153 uint64_t t1,t2;
4154 t1=XMM(d).q[m1];
4155 t2=XMM(s).q[m2];
4156 XMM(d).q[0]=t1;
4157 XMM(d).q[1]=t2;
4158 } else {
4159 uint64_t t1;
4160 XMM_REG src;
4161 uint32_t ea = GetEA(modrm, 0);
4162 READXMM(ea, src);
4163 t1=XMM(d).q[m1];
4164 XMM(d).q[0]=t1;
4165 XMM(d).q[1]=src.q[m2];
4166 }
4167 CYCLES(1); // TODO: correct cycle count
4168 }
4169
sse_unpcklps_r128_rm128()4170 void i386_device::sse_unpcklps_r128_rm128() // Opcode 0f 14
4171 {
4172 uint8_t modrm = FETCH();
4173 int s,d;
4174 uint32_t t1, t2, t3, t4;
4175 s=modrm & 0x7;
4176 d=(modrm >> 3) & 0x7;
4177 if( modrm >= 0xc0 ) {
4178 t1 = XMM(s).d[1];
4179 t2 = XMM(d).d[1];
4180 t3 = XMM(s).d[0];
4181 t4 = XMM(d).d[0];
4182 XMM(d).d[3]=t1;
4183 XMM(d).d[2]=t2;
4184 XMM(d).d[1]=t3;
4185 XMM(d).d[0]=t4;
4186 } else {
4187 XMM_REG src;
4188 uint32_t ea = GetEA(modrm, 0);
4189 READXMM(ea, src);
4190 t2 = XMM(d).d[1];
4191 XMM(d).d[3]=src.d[1];
4192 XMM(d).d[2]=t2;
4193 XMM(d).d[1]=src.d[0];
4194 }
4195 CYCLES(1); // TODO: correct cycle count
4196 }
4197
sse_unpcklpd_r128_rm128()4198 void i386_device::sse_unpcklpd_r128_rm128() // Opcode 66 0f 14
4199 {
4200 uint8_t modrm = FETCH();
4201 int s,d;
4202 s=modrm & 0x7;
4203 d=(modrm >> 3) & 0x7;
4204 if( modrm >= 0xc0 ) {
4205 XMM(d).q[1]=XMM(s).q[0];
4206 XMM(d).q[0]=XMM(d).q[0];
4207 } else {
4208 XMM_REG src;
4209 uint32_t ea = GetEA(modrm, 0);
4210 READXMM(ea, src);
4211 XMM(d).q[1]=src.q[0];
4212 XMM(d).q[0]=XMM(d).q[0];
4213 }
4214 CYCLES(1); // TODO: correct cycle count
4215 }
4216
sse_unpckhps_r128_rm128()4217 void i386_device::sse_unpckhps_r128_rm128() // Opcode 0f 15
4218 {
4219 uint8_t modrm = FETCH();
4220 int s,d;
4221 uint32_t t1, t2, t3, t4;
4222 s=modrm & 0x7;
4223 d=(modrm >> 3) & 0x7;
4224 if( modrm >= 0xc0 ) {
4225 t1 = XMM(d).d[2];
4226 t2 = XMM(s).d[2];
4227 t3 = XMM(d).d[3];
4228 t4 = XMM(s).d[3];
4229 XMM(d).d[0]=t1;
4230 XMM(d).d[1]=t2;
4231 XMM(d).d[2]=t3;
4232 XMM(d).d[3]=t4;
4233 } else {
4234 XMM_REG src;
4235 uint32_t ea = GetEA(modrm, 0);
4236 READXMM(ea, src);
4237 t1 = XMM(d).d[2];
4238 t2 = XMM(d).d[3];
4239 XMM(d).d[0]=t1;
4240 XMM(d).d[1]=src.d[2];
4241 XMM(d).d[2]=t2;
4242 XMM(d).d[3]=src.d[3];
4243 }
4244 CYCLES(1); // TODO: correct cycle count
4245 }
4246
sse_unpckhpd_r128_rm128()4247 void i386_device::sse_unpckhpd_r128_rm128() // Opcode 66 0f 15
4248 {
4249 uint8_t modrm = FETCH();
4250 int s,d;
4251 s=modrm & 0x7;
4252 d=(modrm >> 3) & 0x7;
4253 if( modrm >= 0xc0 ) {
4254 XMM(d).q[0]=XMM(d).q[1];
4255 XMM(d).q[1]=XMM(s).q[1];
4256 } else {
4257 XMM_REG src;
4258 uint32_t ea = GetEA(modrm, 0);
4259 READXMM(ea, src);
4260 XMM(d).q[0]=XMM(d).q[1];
4261 XMM(d).q[1]=src.q[1];
4262 }
4263 CYCLES(1); // TODO: correct cycle count
4264 }
4265
sse_issingleordered(float op1,float op2)4266 static inline bool sse_issingleordered(float op1, float op2)
4267 {
4268 // TODO: true when at least one of the two source operands being compared is a NaN
4269 return (op1 != op1) || (op1 != op2);
4270 }
4271
sse_issingleunordered(float op1,float op2)4272 static inline bool sse_issingleunordered(float op1, float op2)
4273 {
4274 // TODO: true when neither source operand is a NaN
4275 return !((op1 != op1) || (op1 != op2));
4276 }
4277
sse_isdoubleordered(double op1,double op2)4278 static inline bool sse_isdoubleordered(double op1, double op2)
4279 {
4280 // TODO: true when at least one of the two source operands being compared is a NaN
4281 return (op1 != op1) || (op1 != op2);
4282 }
4283
sse_isdoubleunordered(double op1,double op2)4284 static inline bool sse_isdoubleunordered(double op1, double op2)
4285 {
4286 // TODO: true when neither source operand is a NaN
4287 return !((op1 != op1) || (op1 != op2));
4288 }
4289
sse_predicate_compare_single(uint8_t imm8,XMM_REG d,XMM_REG s)4290 void i386_device::sse_predicate_compare_single(uint8_t imm8, XMM_REG d, XMM_REG s)
4291 {
4292 switch (imm8 & 7)
4293 {
4294 case 0:
4295 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4296 d.d[1]=d.f[1] == s.f[1] ? 0xffffffff : 0;
4297 d.d[2]=d.f[2] == s.f[2] ? 0xffffffff : 0;
4298 d.d[3]=d.f[3] == s.f[3] ? 0xffffffff : 0;
4299 break;
4300 case 1:
4301 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4302 d.d[1]=d.f[1] < s.f[1] ? 0xffffffff : 0;
4303 d.d[2]=d.f[2] < s.f[2] ? 0xffffffff : 0;
4304 d.d[3]=d.f[3] < s.f[3] ? 0xffffffff : 0;
4305 break;
4306 case 2:
4307 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4308 d.d[1]=d.f[1] <= s.f[1] ? 0xffffffff : 0;
4309 d.d[2]=d.f[2] <= s.f[2] ? 0xffffffff : 0;
4310 d.d[3]=d.f[3] <= s.f[3] ? 0xffffffff : 0;
4311 break;
4312 case 3:
4313 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4314 d.d[1]=sse_issingleunordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4315 d.d[2]=sse_issingleunordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4316 d.d[3]=sse_issingleunordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4317 break;
4318 case 4:
4319 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4320 d.d[1]=d.f[1] != s.f[1] ? 0xffffffff : 0;
4321 d.d[2]=d.f[2] != s.f[2] ? 0xffffffff : 0;
4322 d.d[3]=d.f[3] != s.f[3] ? 0xffffffff : 0;
4323 break;
4324 case 5:
4325 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4326 d.d[1]=d.f[1] < s.f[1] ? 0 : 0xffffffff;
4327 d.d[2]=d.f[2] < s.f[2] ? 0 : 0xffffffff;
4328 d.d[3]=d.f[3] < s.f[3] ? 0 : 0xffffffff;
4329 break;
4330 case 6:
4331 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4332 d.d[1]=d.f[1] <= s.f[1] ? 0 : 0xffffffff;
4333 d.d[2]=d.f[2] <= s.f[2] ? 0 : 0xffffffff;
4334 d.d[3]=d.f[3] <= s.f[3] ? 0 : 0xffffffff;
4335 break;
4336 case 7:
4337 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4338 d.d[1]=sse_issingleordered(d.f[1], s.f[1]) ? 0xffffffff : 0;
4339 d.d[2]=sse_issingleordered(d.f[2], s.f[2]) ? 0xffffffff : 0;
4340 d.d[3]=sse_issingleordered(d.f[3], s.f[3]) ? 0xffffffff : 0;
4341 break;
4342 }
4343 }
4344
sse_predicate_compare_double(uint8_t imm8,XMM_REG d,XMM_REG s)4345 void i386_device::sse_predicate_compare_double(uint8_t imm8, XMM_REG d, XMM_REG s)
4346 {
4347 switch (imm8 & 7)
4348 {
4349 case 0:
4350 d.q[0]=d.f64[0] == s.f64[0] ? 0xffffffffffffffffU : 0;
4351 d.q[1]=d.f64[1] == s.f64[1] ? 0xffffffffffffffffU : 0;
4352 break;
4353 case 1:
4354 d.q[0]=d.f64[0] < s.f64[0] ? 0xffffffffffffffffU : 0;
4355 d.q[1]=d.f64[1] < s.f64[1] ? 0xffffffffffffffffU : 0;
4356 break;
4357 case 2:
4358 d.q[0]=d.f64[0] <= s.f64[0] ? 0xffffffffffffffffU : 0;
4359 d.q[1]=d.f64[1] <= s.f64[1] ? 0xffffffffffffffffU : 0;
4360 break;
4361 case 3:
4362 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffffU : 0;
4363 d.q[1]=sse_isdoubleunordered(d.f64[1], s.f64[1]) ? 0xffffffffffffffffU : 0;
4364 break;
4365 case 4:
4366 d.q[0]=d.f64[0] != s.f64[0] ? 0xffffffffffffffffU : 0;
4367 d.q[1]=d.f64[1] != s.f64[1] ? 0xffffffffffffffffU : 0;
4368 break;
4369 case 5:
4370 d.q[0]=d.f64[0] < s.f64[0] ? 0 : 0xffffffffffffffffU;
4371 d.q[1]=d.f64[1] < s.f64[1] ? 0 : 0xffffffffffffffffU;
4372 break;
4373 case 6:
4374 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : 0xffffffffffffffffU;
4375 d.q[1]=d.f64[1] <= s.f64[1] ? 0 : 0xffffffffffffffffU;
4376 break;
4377 case 7:
4378 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffffU : 0;
4379 d.q[1]=sse_isdoubleordered(d.f64[1], s.f64[1]) ? 0xffffffffffffffffU : 0;
4380 break;
4381 }
4382 }
4383
sse_predicate_compare_single_scalar(uint8_t imm8,XMM_REG d,XMM_REG s)4384 void i386_device::sse_predicate_compare_single_scalar(uint8_t imm8, XMM_REG d, XMM_REG s)
4385 {
4386 switch (imm8 & 7)
4387 {
4388 case 0:
4389 d.d[0]=d.f[0] == s.f[0] ? 0xffffffff : 0;
4390 break;
4391 case 1:
4392 d.d[0]=d.f[0] < s.f[0] ? 0xffffffff : 0;
4393 break;
4394 case 2:
4395 d.d[0]=d.f[0] <= s.f[0] ? 0xffffffff : 0;
4396 break;
4397 case 3:
4398 d.d[0]=sse_issingleunordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4399 break;
4400 case 4:
4401 d.d[0]=d.f[0] != s.f[0] ? 0xffffffff : 0;
4402 break;
4403 case 5:
4404 d.d[0]=d.f[0] < s.f[0] ? 0 : 0xffffffff;
4405 break;
4406 case 6:
4407 d.d[0]=d.f[0] <= s.f[0] ? 0 : 0xffffffff;
4408 break;
4409 case 7:
4410 d.d[0]=sse_issingleordered(d.f[0], s.f[0]) ? 0xffffffff : 0;
4411 break;
4412 }
4413 }
4414
sse_predicate_compare_double_scalar(uint8_t imm8,XMM_REG d,XMM_REG s)4415 void i386_device::sse_predicate_compare_double_scalar(uint8_t imm8, XMM_REG d, XMM_REG s)
4416 {
4417 switch (imm8 & 7)
4418 {
4419 case 0:
4420 d.q[0]=d.f64[0] == s.f64[0] ? 0xffffffffffffffffU : 0;
4421 break;
4422 case 1:
4423 d.q[0]=d.f64[0] < s.f64[0] ? 0xffffffffffffffffU : 0;
4424 break;
4425 case 2:
4426 d.q[0]=d.f64[0] <= s.f64[0] ? 0xffffffffffffffffU : 0;
4427 break;
4428 case 3:
4429 d.q[0]=sse_isdoubleunordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffffU : 0;
4430 break;
4431 case 4:
4432 d.q[0]=d.f64[0] != s.f64[0] ? 0xffffffffffffffffU : 0;
4433 break;
4434 case 5:
4435 d.q[0]=d.f64[0] < s.f64[0] ? 0 : 0xffffffffffffffffU;
4436 break;
4437 case 6:
4438 d.q[0]=d.f64[0] <= s.f64[0] ? 0 : 0xffffffffffffffffU;
4439 break;
4440 case 7:
4441 d.q[0]=sse_isdoubleordered(d.f64[0], s.f64[0]) ? 0xffffffffffffffffU : 0;
4442 break;
4443 }
4444 }
4445
sse_cmpps_r128_rm128_i8()4446 void i386_device::sse_cmpps_r128_rm128_i8() // Opcode 0f c2
4447 {
4448 uint8_t modrm = FETCH();
4449 if( modrm >= 0xc0 ) {
4450 int s,d;
4451 uint8_t imm8 = FETCH();
4452 s=modrm & 0x7;
4453 d=(modrm >> 3) & 0x7;
4454 sse_predicate_compare_single(imm8, XMM(d), XMM(s));
4455 } else {
4456 int d;
4457 XMM_REG s;
4458 uint32_t ea = GetEA(modrm, 0);
4459 uint8_t imm8 = FETCH();
4460 READXMM(ea, s);
4461 d=(modrm >> 3) & 0x7;
4462 sse_predicate_compare_single(imm8, XMM(d), s);
4463 }
4464 CYCLES(1); // TODO: correct cycle count
4465 }
4466
sse_cmppd_r128_rm128_i8()4467 void i386_device::sse_cmppd_r128_rm128_i8() // Opcode 66 0f c2
4468 {
4469 uint8_t modrm = FETCH();
4470 if( modrm >= 0xc0 ) {
4471 int s,d;
4472 uint8_t imm8 = FETCH();
4473 s=modrm & 0x7;
4474 d=(modrm >> 3) & 0x7;
4475 sse_predicate_compare_double(imm8, XMM(d), XMM(s));
4476 } else {
4477 int d;
4478 XMM_REG s;
4479 uint32_t ea = GetEA(modrm, 0);
4480 uint8_t imm8 = FETCH();
4481 READXMM(ea, s);
4482 d=(modrm >> 3) & 0x7;
4483 sse_predicate_compare_double(imm8, XMM(d), s);
4484 }
4485 CYCLES(1); // TODO: correct cycle count
4486 }
4487
sse_cmpss_r128_r128m32_i8()4488 void i386_device::sse_cmpss_r128_r128m32_i8() // Opcode f3 0f c2
4489 {
4490 uint8_t modrm = FETCH();
4491 if( modrm >= 0xc0 ) {
4492 int s,d;
4493 uint8_t imm8 = FETCH();
4494 s=modrm & 0x7;
4495 d=(modrm >> 3) & 0x7;
4496 sse_predicate_compare_single_scalar(imm8, XMM(d), XMM(s));
4497 } else {
4498 int d;
4499 XMM_REG s;
4500 uint32_t ea = GetEA(modrm, 0);
4501 uint8_t imm8 = FETCH();
4502 s.d[0]=READ32(ea);
4503 d=(modrm >> 3) & 0x7;
4504 sse_predicate_compare_single_scalar(imm8, XMM(d), s);
4505 }
4506 CYCLES(1); // TODO: correct cycle count
4507 }
4508
sse_pinsrw_r64_r16m16_i8()4509 void i386_device::sse_pinsrw_r64_r16m16_i8() // Opcode 0f c4, 16bit register
4510 {
4511 MMXPROLOG();
4512 uint8_t modrm = FETCH();
4513 if( modrm >= 0xc0 ) {
4514 uint8_t imm8 = FETCH();
4515 uint16_t v = LOAD_RM16(modrm);
4516 if (m_xmm_operand_size)
4517 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4518 else
4519 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4520 } else {
4521 uint32_t ea = GetEA(modrm, 0);
4522 uint8_t imm8 = FETCH();
4523 uint16_t v = READ16(ea);
4524 if (m_xmm_operand_size)
4525 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4526 else
4527 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4528 }
4529 CYCLES(1); // TODO: correct cycle count
4530 }
4531
sse_pinsrw_r64_r32m16_i8()4532 void i386_device::sse_pinsrw_r64_r32m16_i8() // Opcode 0f c4, 32bit register
4533 {
4534 MMXPROLOG();
4535 uint8_t modrm = FETCH();
4536 if( modrm >= 0xc0 ) {
4537 uint8_t imm8 = FETCH();
4538 uint16_t v = (uint16_t)LOAD_RM32(modrm);
4539 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4540 } else {
4541 uint32_t ea = GetEA(modrm, 0);
4542 uint8_t imm8 = FETCH();
4543 uint16_t v = READ16(ea);
4544 MMX((modrm >> 3) & 0x7).w[imm8 & 3] = v;
4545 }
4546 CYCLES(1); // TODO: correct cycle count
4547 }
4548
sse_pinsrw_r128_r32m16_i8()4549 void i386_device::sse_pinsrw_r128_r32m16_i8() // Opcode 66 0f c4
4550 {
4551 uint8_t modrm = FETCH();
4552 if (modrm >= 0xc0) {
4553 uint8_t imm8 = FETCH();
4554 uint16_t v = (uint16_t)LOAD_RM32(modrm);
4555 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4556 }
4557 else {
4558 uint32_t ea = GetEA(modrm, 0);
4559 uint8_t imm8 = FETCH();
4560 uint16_t v = READ16(ea);
4561 XMM((modrm >> 3) & 0x7).w[imm8 & 7] = v;
4562 }
4563 CYCLES(1); // TODO: correct cycle count
4564 }
4565
sse_pextrw_r16_r64_i8()4566 void i386_device::sse_pextrw_r16_r64_i8() // Opcode 0f c5
4567 {
4568 //MMXPROLOG();
4569 uint8_t modrm = FETCH();
4570 if( modrm >= 0xc0 ) {
4571 uint8_t imm8 = FETCH();
4572 if (m_xmm_operand_size)
4573 STORE_REG16(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4574 else
4575 STORE_REG16(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4576 } else {
4577 //uint8_t imm8 = FETCH();
4578 report_invalid_modrm("pextrw_r16_r64_i8", modrm);
4579 }
4580 CYCLES(1); // TODO: correct cycle count
4581 }
4582
sse_pextrw_r32_r64_i8()4583 void i386_device::sse_pextrw_r32_r64_i8() // Opcode 0f c5
4584 {
4585 //MMXPROLOG();
4586 uint8_t modrm = FETCH();
4587 if( modrm >= 0xc0 ) {
4588 uint8_t imm8 = FETCH();
4589 STORE_REG32(modrm, MMX(modrm & 0x7).w[imm8 & 3]);
4590 } else {
4591 //uint8_t imm8 = FETCH();
4592 report_invalid_modrm("pextrw_r32_r64_i8", modrm);
4593 }
4594 CYCLES(1); // TODO: correct cycle count
4595 }
4596
sse_pextrw_reg_r128_i8()4597 void i386_device::sse_pextrw_reg_r128_i8() // Opcode 66 0f c5
4598 {
4599 uint8_t modrm = FETCH();
4600 if (modrm >= 0xc0) {
4601 uint8_t imm8 = FETCH();
4602 STORE_REG32(modrm, XMM(modrm & 0x7).w[imm8 & 7]);
4603 }
4604 else {
4605 //uint8_t imm8 = FETCH();
4606 report_invalid_modrm("sse_pextrw_reg_r128_i8", modrm);
4607 }
4608 CYCLES(1); // TODO: correct cycle count
4609 }
4610
sse_pminub_r64_rm64()4611 void i386_device::sse_pminub_r64_rm64() // Opcode 0f da
4612 {
4613 int n;
4614 MMXPROLOG();
4615 uint8_t modrm = FETCH();
4616 if( modrm >= 0xc0 ) {
4617 for (n=0;n < 8;n++)
4618 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4619 } else {
4620 MMX_REG s;
4621 uint32_t ea = GetEA(modrm, 0);
4622 READMMX(ea, s);
4623 for (n=0;n < 8;n++)
4624 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] < s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4625 }
4626 CYCLES(1); // TODO: correct cycle count
4627 }
4628
sse_pminub_r128_rm128()4629 void i386_device::sse_pminub_r128_rm128() // Opcode 66 0f da
4630 {
4631 uint8_t modrm = FETCH();
4632 if( modrm >= 0xc0 ) {
4633 for (int n=0;n < 16;n++)
4634 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
4635 } else {
4636 XMM_REG s;
4637 uint32_t ea = GetEA(modrm, 0);
4638 READXMM(ea, s);
4639 for (int n=0;n < 16;n++)
4640 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] < s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
4641 }
4642 CYCLES(1); // TODO: correct cycle count
4643 }
4644
sse_pmaxub_r64_rm64()4645 void i386_device::sse_pmaxub_r64_rm64() // Opcode 0f de
4646 {
4647 int n;
4648 MMXPROLOG();
4649 uint8_t modrm = FETCH();
4650 if( modrm >= 0xc0 ) {
4651 for (n=0;n < 8;n++)
4652 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > MMX(modrm & 0x7).b[n] ? MMX((modrm >> 3) & 0x7).b[n] : MMX(modrm & 0x7).b[n];
4653 } else {
4654 MMX_REG s;
4655 uint32_t ea = GetEA(modrm, 0);
4656 READMMX(ea, s);
4657 for (n=0;n < 8;n++)
4658 MMX((modrm >> 3) & 0x7).b[n] = MMX((modrm >> 3) & 0x7).b[n] > s.b[n] ? MMX((modrm >> 3) & 0x7).b[n] : s.b[n];
4659 }
4660 CYCLES(1); // TODO: correct cycle count
4661 }
4662
sse_pavgb_r64_rm64()4663 void i386_device::sse_pavgb_r64_rm64() // Opcode 0f e0
4664 {
4665 int n;
4666 MMXPROLOG();
4667 uint8_t modrm = FETCH();
4668 if( modrm >= 0xc0 ) {
4669 for (n=0;n < 8;n++)
4670 MMX((modrm >> 3) & 0x7).b[n] = ((uint16_t)MMX((modrm >> 3) & 0x7).b[n] + (uint16_t)MMX(modrm & 0x7).b[n] + 1) >> 1;
4671 } else {
4672 MMX_REG s;
4673 uint32_t ea = GetEA(modrm, 0);
4674 READMMX(ea, s);
4675 for (n=0;n < 8;n++)
4676 MMX((modrm >> 3) & 0x7).b[n] = ((uint16_t)MMX((modrm >> 3) & 0x7).b[n] + (uint16_t)s.b[n] + 1) >> 1;
4677 }
4678 CYCLES(1); // TODO: correct cycle count
4679 }
4680
sse_pavgw_r64_rm64()4681 void i386_device::sse_pavgw_r64_rm64() // Opcode 0f e3
4682 {
4683 int n;
4684 MMXPROLOG();
4685 uint8_t modrm = FETCH();
4686 if( modrm >= 0xc0 ) {
4687 for (n=0;n < 4;n++)
4688 MMX((modrm >> 3) & 0x7).w[n] = ((uint32_t)MMX((modrm >> 3) & 0x7).w[n] + (uint32_t)MMX(modrm & 0x7).w[n] + 1) >> 1;
4689 } else {
4690 MMX_REG s;
4691 uint32_t ea = GetEA(modrm, 0);
4692 READMMX(ea, s);
4693 for (n=0;n < 4;n++)
4694 MMX((modrm >> 3) & 0x7).w[n] = ((uint32_t)MMX((modrm >> 3) & 0x7).w[n] + (uint32_t)s.w[n] + 1) >> 1;
4695 }
4696 CYCLES(1); // TODO: correct cycle count
4697 }
4698
sse_pmulhuw_r64_rm64()4699 void i386_device::sse_pmulhuw_r64_rm64() // Opcode 0f e4
4700 {
4701 MMXPROLOG();
4702 uint8_t modrm = FETCH();
4703 if( modrm >= 0xc0 ) {
4704 MMX((modrm >> 3) & 0x7).w[0]=((uint32_t)MMX((modrm >> 3) & 0x7).w[0]*(uint32_t)MMX(modrm & 7).w[0]) >> 16;
4705 MMX((modrm >> 3) & 0x7).w[1]=((uint32_t)MMX((modrm >> 3) & 0x7).w[1]*(uint32_t)MMX(modrm & 7).w[1]) >> 16;
4706 MMX((modrm >> 3) & 0x7).w[2]=((uint32_t)MMX((modrm >> 3) & 0x7).w[2]*(uint32_t)MMX(modrm & 7).w[2]) >> 16;
4707 MMX((modrm >> 3) & 0x7).w[3]=((uint32_t)MMX((modrm >> 3) & 0x7).w[3]*(uint32_t)MMX(modrm & 7).w[3]) >> 16;
4708 } else {
4709 MMX_REG s;
4710 uint32_t ea = GetEA(modrm, 0);
4711 READMMX(ea, s);
4712 MMX((modrm >> 3) & 0x7).w[0]=((uint32_t)MMX((modrm >> 3) & 0x7).w[0]*(uint32_t)s.w[0]) >> 16;
4713 MMX((modrm >> 3) & 0x7).w[1]=((uint32_t)MMX((modrm >> 3) & 0x7).w[1]*(uint32_t)s.w[1]) >> 16;
4714 MMX((modrm >> 3) & 0x7).w[2]=((uint32_t)MMX((modrm >> 3) & 0x7).w[2]*(uint32_t)s.w[2]) >> 16;
4715 MMX((modrm >> 3) & 0x7).w[3]=((uint32_t)MMX((modrm >> 3) & 0x7).w[3]*(uint32_t)s.w[3]) >> 16;
4716 }
4717 CYCLES(1); // TODO: correct cycle count
4718 }
4719
sse_pminsw_r64_rm64()4720 void i386_device::sse_pminsw_r64_rm64() // Opcode 0f ea
4721 {
4722 int n;
4723 MMXPROLOG();
4724 uint8_t modrm = FETCH();
4725 if( modrm >= 0xc0 ) {
4726 for (n=0;n < 4;n++)
4727 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4728 } else {
4729 MMX_REG s;
4730 uint32_t ea = GetEA(modrm, 0);
4731 READMMX(ea, s);
4732 for (n=0;n < 4;n++)
4733 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] < s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4734 }
4735 CYCLES(1); // TODO: correct cycle count
4736 }
4737
sse_pmaxsw_r64_rm64()4738 void i386_device::sse_pmaxsw_r64_rm64() // Opcode 0f ee
4739 {
4740 int n;
4741 MMXPROLOG();
4742 uint8_t modrm = FETCH();
4743 if( modrm >= 0xc0 ) {
4744 for (n=0;n < 4;n++)
4745 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > MMX(modrm & 0x7).s[n] ? MMX((modrm >> 3) & 0x7).s[n] : MMX(modrm & 0x7).s[n];
4746 } else {
4747 MMX_REG s;
4748 uint32_t ea = GetEA(modrm, 0);
4749 READMMX(ea, s);
4750 for (n=0;n < 4;n++)
4751 MMX((modrm >> 3) & 0x7).s[n] = MMX((modrm >> 3) & 0x7).s[n] > s.s[n] ? MMX((modrm >> 3) & 0x7).s[n] : s.s[n];
4752 }
4753 CYCLES(1); // TODO: correct cycle count
4754 }
4755
sse_pmuludq_r64_rm64()4756 void i386_device::sse_pmuludq_r64_rm64() // Opcode 0f f4
4757 {
4758 MMXPROLOG();
4759 uint8_t modrm = FETCH();
4760 if( modrm >= 0xc0 ) {
4761 MMX((modrm >> 3) & 0x7).q = (uint64_t)MMX((modrm >> 3) & 0x7).d[0] * (uint64_t)MMX(modrm & 0x7).d[0];
4762 } else {
4763 MMX_REG s;
4764 uint32_t ea = GetEA(modrm, 0);
4765 READMMX(ea, s);
4766 MMX((modrm >> 3) & 0x7).q = (uint64_t)MMX((modrm >> 3) & 0x7).d[0] * (uint64_t)s.d[0];
4767 }
4768 CYCLES(1); // TODO: correct cycle count
4769 }
4770
sse_pmuludq_r128_rm128()4771 void i386_device::sse_pmuludq_r128_rm128() // Opcode 66 0f f4
4772 {
4773 uint8_t modrm = FETCH();
4774 if( modrm >= 0xc0 ) {
4775 XMM((modrm >> 3) & 0x7).q[0] = (uint64_t)XMM((modrm >> 3) & 0x7).d[0] * (uint64_t)XMM(modrm & 0x7).d[0];
4776 XMM((modrm >> 3) & 0x7).q[1] = (uint64_t)XMM((modrm >> 3) & 0x7).d[2] * (uint64_t)XMM(modrm & 0x7).d[2];
4777 } else {
4778 XMM_REG s;
4779 uint32_t ea = GetEA(modrm, 0);
4780 READXMM(ea, s);
4781 XMM((modrm >> 3) & 0x7).q[0] = (uint64_t)XMM((modrm >> 3) & 0x7).d[0] * (uint64_t)s.d[0];
4782 XMM((modrm >> 3) & 0x7).q[1] = (uint64_t)XMM((modrm >> 3) & 0x7).d[2] * (uint64_t)s.d[2];
4783 }
4784 CYCLES(1); // TODO: correct cycle count
4785 }
4786
sse_psadbw_r64_rm64()4787 void i386_device::sse_psadbw_r64_rm64() // Opcode 0f f6
4788 {
4789 int n;
4790 int32_t temp;
4791 MMXPROLOG();
4792 uint8_t modrm = FETCH();
4793 if( modrm >= 0xc0 ) {
4794 temp=0;
4795 for (n=0;n < 8;n++)
4796 temp += abs((int32_t)MMX((modrm >> 3) & 0x7).b[n] - (int32_t)MMX(modrm & 0x7).b[n]);
4797 MMX((modrm >> 3) & 0x7).l=(uint64_t)temp & 0xffff;
4798 } else {
4799 MMX_REG s;
4800 uint32_t ea = GetEA(modrm, 0);
4801 READMMX(ea, s);
4802 temp=0;
4803 for (n=0;n < 8;n++)
4804 temp += abs((int32_t)MMX((modrm >> 3) & 0x7).b[n] - (int32_t)s.b[n]);
4805 MMX((modrm >> 3) & 0x7).l=(uint64_t)temp & 0xffff;
4806 }
4807 CYCLES(1); // TODO: correct cycle count
4808 }
4809
sse_psubq_r64_rm64()4810 void i386_device::sse_psubq_r64_rm64() // Opcode 0f fb
4811 {
4812 MMXPROLOG();
4813 uint8_t modrm = FETCH();
4814 if( modrm >= 0xc0 ) {
4815 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - MMX(modrm & 7).q;
4816 } else {
4817 MMX_REG s;
4818 uint32_t ea = GetEA(modrm, 0);
4819 READMMX(ea, s);
4820 MMX((modrm >> 3) & 0x7).q=MMX((modrm >> 3) & 0x7).q - s.q;
4821 }
4822 CYCLES(1); // TODO: correct cycle count
4823 }
4824
sse_psubq_r128_rm128()4825 void i386_device::sse_psubq_r128_rm128() // Opcode 66 0f fb
4826 {
4827 uint8_t modrm = FETCH();
4828 if( modrm >= 0xc0 ) {
4829 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - XMM(modrm & 7).q[0];
4830 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - XMM(modrm & 7).q[1];
4831 } else {
4832 XMM_REG s;
4833 uint32_t ea = GetEA(modrm, 0);
4834 READXMM(ea, s);
4835 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] - s.q[0];
4836 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] - s.q[1];
4837 }
4838 CYCLES(1); // TODO: correct cycle count
4839 }
4840
sse_pshufd_r128_rm128_i8()4841 void i386_device::sse_pshufd_r128_rm128_i8() // Opcode 66 0f 70
4842 {
4843 uint8_t modrm = FETCH();
4844 if( modrm >= 0xc0 ) {
4845 XMM_REG t;
4846 int s,d;
4847 uint8_t imm8 = FETCH();
4848 s=modrm & 0x7;
4849 d=(modrm >> 3) & 0x7;
4850 t.q[0]=XMM(s).q[0];
4851 t.q[1]=XMM(s).q[1];
4852 XMM(d).d[0]=t.d[imm8 & 3];
4853 XMM(d).d[1]=t.d[(imm8 >> 2) & 3];
4854 XMM(d).d[2]=t.d[(imm8 >> 4) & 3];
4855 XMM(d).d[3]=t.d[(imm8 >> 6) & 3];
4856 } else {
4857 XMM_REG s;
4858 int d=(modrm >> 3) & 0x7;
4859 uint32_t ea = GetEA(modrm, 0);
4860 uint8_t imm8 = FETCH();
4861 READXMM(ea, s);
4862 XMM(d).d[0]=s.d[(imm8 & 3)];
4863 XMM(d).d[1]=s.d[((imm8 >> 2) & 3)];
4864 XMM(d).d[2]=s.d[((imm8 >> 4) & 3)];
4865 XMM(d).d[3]=s.d[((imm8 >> 6) & 3)];
4866 }
4867 CYCLES(1); // TODO: correct cycle count
4868 }
4869
sse_pshuflw_r128_rm128_i8()4870 void i386_device::sse_pshuflw_r128_rm128_i8() // Opcode f2 0f 70
4871 {
4872 uint8_t modrm = FETCH();
4873 if( modrm >= 0xc0 ) {
4874 XMM_REG t;
4875 int s,d;
4876 uint8_t imm8 = FETCH();
4877 s=modrm & 0x7;
4878 d=(modrm >> 3) & 0x7;
4879 t.q[0]=XMM(s).q[0];
4880 XMM(d).q[1]=XMM(s).q[1];
4881 XMM(d).w[0]=t.w[imm8 & 3];
4882 XMM(d).w[1]=t.w[(imm8 >> 2) & 3];
4883 XMM(d).w[2]=t.w[(imm8 >> 4) & 3];
4884 XMM(d).w[3]=t.w[(imm8 >> 6) & 3];
4885 } else {
4886 XMM_REG s;
4887 int d=(modrm >> 3) & 0x7;
4888 uint32_t ea = GetEA(modrm, 0);
4889 uint8_t imm8 = FETCH();
4890 READXMM(ea, s);
4891 XMM(d).q[1]=s.q[1];
4892 XMM(d).w[0]=s.w[imm8 & 3];
4893 XMM(d).w[1]=s.w[(imm8 >> 2) & 3];
4894 XMM(d).w[2]=s.w[(imm8 >> 4) & 3];
4895 XMM(d).w[3]=s.w[(imm8 >> 6) & 3];
4896 }
4897 CYCLES(1); // TODO: correct cycle count
4898 }
4899
sse_pshufhw_r128_rm128_i8()4900 void i386_device::sse_pshufhw_r128_rm128_i8() // Opcode f3 0f 70
4901 {
4902 uint8_t modrm = FETCH();
4903 if( modrm >= 0xc0 ) {
4904 XMM_REG t;
4905 int s,d;
4906 uint8_t imm8 = FETCH();
4907 s=modrm & 0x7;
4908 d=(modrm >> 3) & 0x7;
4909 t.q[0]=XMM(s).q[1];
4910 XMM(d).q[0]=XMM(s).q[0];
4911 XMM(d).w[4]=t.w[imm8 & 3];
4912 XMM(d).w[5]=t.w[(imm8 >> 2) & 3];
4913 XMM(d).w[6]=t.w[(imm8 >> 4) & 3];
4914 XMM(d).w[7]=t.w[(imm8 >> 6) & 3];
4915 } else {
4916 XMM_REG s;
4917 int d=(modrm >> 3) & 0x7;
4918 uint32_t ea = GetEA(modrm, 0);
4919 uint8_t imm8 = FETCH();
4920 READXMM(ea, s);
4921 XMM(d).q[0]=s.q[0];
4922 XMM(d).w[4]=s.w[4 + (imm8 & 3)];
4923 XMM(d).w[5]=s.w[4 + ((imm8 >> 2) & 3)];
4924 XMM(d).w[6]=s.w[4 + ((imm8 >> 4) & 3)];
4925 XMM(d).w[7]=s.w[4 + ((imm8 >> 6) & 3)];
4926 }
4927 CYCLES(1); // TODO: correct cycle count
4928 }
4929
sse_packsswb_r128_rm128()4930 void i386_device::sse_packsswb_r128_rm128() // Opcode 66 0f 63
4931 {
4932 uint8_t modrm = FETCH();
4933 if (modrm >= 0xc0) {
4934 XMM_REG t;
4935 int s, d;
4936 s = modrm & 0x7;
4937 d = (modrm >> 3) & 0x7;
4938 t.q[0] = XMM(s).q[0];
4939 t.q[1] = XMM(s).q[1];
4940 for (int n = 0; n < 8; n++)
4941 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4942 for (int n = 0; n < 8; n++)
4943 XMM(d).c[n+8] = SaturatedSignedWordToSignedByte(t.s[n]);
4944 }
4945 else {
4946 XMM_REG s;
4947 int d = (modrm >> 3) & 0x7;
4948 uint32_t ea = GetEA(modrm, 0);
4949 READXMM(ea, s);
4950 for (int n = 0; n < 8; n++)
4951 XMM(d).c[n] = SaturatedSignedWordToSignedByte(XMM(d).s[n]);
4952 for (int n = 0; n < 8; n++)
4953 XMM(d).c[n + 8] = SaturatedSignedWordToSignedByte(s.s[n]);
4954 }
4955 CYCLES(1); // TODO: correct cycle count
4956 }
4957
sse_packssdw_r128_rm128()4958 void i386_device::sse_packssdw_r128_rm128() // Opcode 66 0f 6b
4959 {
4960 uint8_t modrm = FETCH();
4961 if (modrm >= 0xc0) {
4962 XMM_REG t;
4963 int s, d;
4964 s = modrm & 0x7;
4965 d = (modrm >> 3) & 0x7;
4966 t.q[0] = XMM(s).q[0];
4967 t.q[1] = XMM(s).q[1];
4968 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4969 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4970 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4971 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4972 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(t.i[0]);
4973 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(t.i[1]);
4974 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(t.i[2]);
4975 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(t.i[3]);
4976 }
4977 else {
4978 XMM_REG s;
4979 int d = (modrm >> 3) & 0x7;
4980 uint32_t ea = GetEA(modrm, 0);
4981 READXMM(ea, s);
4982 XMM(d).s[0] = SaturatedSignedDwordToSignedWord(XMM(d).i[0]);
4983 XMM(d).s[1] = SaturatedSignedDwordToSignedWord(XMM(d).i[1]);
4984 XMM(d).s[2] = SaturatedSignedDwordToSignedWord(XMM(d).i[2]);
4985 XMM(d).s[3] = SaturatedSignedDwordToSignedWord(XMM(d).i[3]);
4986 XMM(d).s[4] = SaturatedSignedDwordToSignedWord(s.i[0]);
4987 XMM(d).s[5] = SaturatedSignedDwordToSignedWord(s.i[1]);
4988 XMM(d).s[6] = SaturatedSignedDwordToSignedWord(s.i[2]);
4989 XMM(d).s[7] = SaturatedSignedDwordToSignedWord(s.i[3]);
4990 }
4991 CYCLES(1); // TODO: correct cycle count
4992 }
4993
sse_pcmpgtb_r128_rm128()4994 void i386_device::sse_pcmpgtb_r128_rm128() // Opcode 66 0f 64
4995 {
4996 uint8_t modrm = FETCH();
4997 if( modrm >= 0xc0 ) {
4998 int s,d;
4999 s=modrm & 0x7;
5000 d=(modrm >> 3) & 0x7;
5001 for (int c=0;c <= 15;c++)
5002 XMM(d).b[c]=(XMM(d).c[c] > XMM(s).c[c]) ? 0xff : 0;
5003 } else {
5004 XMM_REG s;
5005 int d=(modrm >> 3) & 0x7;
5006 uint32_t ea = GetEA(modrm, 0);
5007 READXMM(ea, s);
5008 for (int c=0;c <= 15;c++)
5009 XMM(d).b[c]=(XMM(d).c[c] > s.c[c]) ? 0xff : 0;
5010 }
5011 CYCLES(1); // TODO: correct cycle count
5012 }
5013
sse_pcmpgtw_r128_rm128()5014 void i386_device::sse_pcmpgtw_r128_rm128() // Opcode 66 0f 65
5015 {
5016 uint8_t modrm = FETCH();
5017 if( modrm >= 0xc0 ) {
5018 int s,d;
5019 s=modrm & 0x7;
5020 d=(modrm >> 3) & 0x7;
5021 for (int c=0;c <= 7;c++)
5022 XMM(d).w[c]=(XMM(d).s[c] > XMM(s).s[c]) ? 0xffff : 0;
5023 } else {
5024 XMM_REG s;
5025 int d=(modrm >> 3) & 0x7;
5026 uint32_t ea = GetEA(modrm, 0);
5027 READXMM(ea, s);
5028 for (int c=0;c <= 7;c++)
5029 XMM(d).w[c]=(XMM(d).s[c] > s.s[c]) ? 0xffff : 0;
5030 }
5031 CYCLES(1); // TODO: correct cycle count
5032 }
5033
sse_pcmpgtd_r128_rm128()5034 void i386_device::sse_pcmpgtd_r128_rm128() // Opcode 66 0f 66
5035 {
5036 uint8_t modrm = FETCH();
5037 if( modrm >= 0xc0 ) {
5038 int s,d;
5039 s=modrm & 0x7;
5040 d=(modrm >> 3) & 0x7;
5041 for (int c=0;c <= 3;c++)
5042 XMM(d).d[c]=(XMM(d).i[c] > XMM(s).i[c]) ? 0xffffffff : 0;
5043 } else {
5044 XMM_REG s;
5045 int d=(modrm >> 3) & 0x7;
5046 uint32_t ea = GetEA(modrm, 0);
5047 READXMM(ea, s);
5048 for (int c=0;c <= 3;c++)
5049 XMM(d).d[c]=(XMM(d).i[c] > s.i[c]) ? 0xffffffff : 0;
5050 }
5051 CYCLES(1); // TODO: correct cycle count
5052 }
5053
sse_packuswb_r128_rm128()5054 void i386_device::sse_packuswb_r128_rm128() // Opcode 66 0f 67
5055 {
5056 uint8_t modrm = FETCH();
5057 if( modrm >= 0xc0 ) {
5058 XMM_REG t;
5059 int s,d;
5060 s=modrm & 0x7;
5061 d=(modrm >> 3) & 0x7;
5062 t.q[0] = XMM(s).q[0];
5063 t.q[1] = XMM(s).q[1];
5064 for (int n = 0; n < 8;n++)
5065 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5066 for (int n = 0; n < 8;n++)
5067 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(t.s[n]);
5068 } else {
5069 XMM_REG s;
5070 int d=(modrm >> 3) & 0x7;
5071 uint32_t ea = GetEA(modrm, 0);
5072 READXMM(ea, s);
5073 for (int n = 0; n < 8;n++)
5074 XMM(d).b[n]=SaturatedSignedWordToUnsignedByte(XMM(d).s[n]);
5075 for (int n = 0; n < 8;n++)
5076 XMM(d).b[n+8]=SaturatedSignedWordToUnsignedByte(s.s[n]);
5077 }
5078 CYCLES(1); // TODO: correct cycle count
5079 }
5080
sse_punpckhbw_r128_rm128()5081 void i386_device::sse_punpckhbw_r128_rm128() // Opcode 66 0f 68
5082 {
5083 uint8_t modrm = FETCH();
5084 if( modrm >= 0xc0 ) {
5085 XMM_REG t;
5086 int s,d;
5087 s=modrm & 0x7;
5088 d=(modrm >> 3) & 0x7;
5089 t.q[1] = XMM(s).q[1];
5090 for (int n = 0; n < 16; n += 2) {
5091 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5092 XMM(d).b[n+1]=t.b[8+(n >> 1)];
5093 }
5094 } else {
5095 XMM_REG s;
5096 int d=(modrm >> 3) & 0x7;
5097 uint32_t ea = GetEA(modrm, 0);
5098 READXMM(ea, s);
5099 for (int n = 0; n < 16; n += 2) {
5100 XMM(d).b[n]=XMM(d).b[8+(n >> 1)];
5101 XMM(d).b[n+1]=s.b[8+(n >> 1)];
5102 }
5103 }
5104 CYCLES(1); // TODO: correct cycle count
5105 }
5106
sse_punpckhwd_r128_rm128()5107 void i386_device::sse_punpckhwd_r128_rm128() // Opcode 66 0f 69
5108 {
5109 uint8_t modrm = FETCH();
5110 if( modrm >= 0xc0 ) {
5111 XMM_REG t;
5112 int s,d;
5113 s=modrm & 0x7;
5114 d=(modrm >> 3) & 0x7;
5115 t.q[1] = XMM(s).q[1];
5116 for (int n = 0; n < 8; n += 2) {
5117 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5118 XMM(d).w[n+1]=t.w[4+(n >> 1)];
5119 }
5120 } else {
5121 XMM_REG s;
5122 int d=(modrm >> 3) & 0x7;
5123 uint32_t ea = GetEA(modrm, 0);
5124 READXMM(ea, s);
5125 for (int n = 0; n < 8; n += 2) {
5126 XMM(d).w[n]=XMM(d).w[4+(n >> 1)];
5127 XMM(d).w[n+1]=s.w[4+(n >> 1)];
5128 }
5129 }
5130 CYCLES(1); // TODO: correct cycle count
5131 }
5132
sse_unpckhdq_r128_rm128()5133 void i386_device::sse_unpckhdq_r128_rm128() // Opcode 66 0f 6a
5134 {
5135 uint8_t modrm = FETCH();
5136 if( modrm >= 0xc0 ) {
5137 XMM_REG t;
5138 int s,d;
5139 s=modrm & 0x7;
5140 d=(modrm >> 3) & 0x7;
5141 t.q[1] = XMM(s).q[1];
5142 XMM(d).d[0]=XMM(d).d[2];
5143 XMM(d).d[1]=t.d[2];
5144 XMM(d).d[2]=XMM(d).d[3];
5145 XMM(d).d[3]=t.d[3];
5146 } else {
5147 XMM_REG s;
5148 int d=(modrm >> 3) & 0x7;
5149 uint32_t ea = GetEA(modrm, 0);
5150 READXMM(ea, s);
5151 XMM(d).d[0]=XMM(d).d[2];
5152 XMM(d).d[1]=s.d[2];
5153 XMM(d).d[2]=XMM(d).d[3];
5154 XMM(d).d[3]=s.d[3];
5155 }
5156 CYCLES(1); // TODO: correct cycle count
5157 }
5158
sse_punpckhqdq_r128_rm128()5159 void i386_device::sse_punpckhqdq_r128_rm128() // Opcode 66 0f 6d
5160 {
5161 uint8_t modrm = FETCH();
5162 if( modrm >= 0xc0 ) {
5163 XMM_REG t;
5164 int s,d;
5165 s=modrm & 0x7;
5166 d=(modrm >> 3) & 0x7;
5167 t.q[1] = XMM(s).q[1];
5168 XMM(d).q[0]=XMM(d).q[1];
5169 XMM(d).q[1]=t.q[1];
5170 } else {
5171 XMM_REG s;
5172 int d=(modrm >> 3) & 0x7;
5173 uint32_t ea = GetEA(modrm, 0);
5174 READXMM(ea, s);
5175 XMM(d).q[0]=XMM(d).q[1];
5176 XMM(d).q[1]=s.q[1];
5177 }
5178 CYCLES(1); // TODO: correct cycle count
5179 }
5180
sse_pcmpeqb_r128_rm128()5181 void i386_device::sse_pcmpeqb_r128_rm128() // Opcode 66 0f 74
5182 {
5183 uint8_t modrm = FETCH();
5184 if( modrm >= 0xc0 ) {
5185 int s,d;
5186 s=modrm & 0x7;
5187 d=(modrm >> 3) & 0x7;
5188 for (int c=0;c <= 15;c++)
5189 XMM(d).b[c]=(XMM(d).c[c] == XMM(s).c[c]) ? 0xff : 0;
5190 } else {
5191 XMM_REG s;
5192 int d=(modrm >> 3) & 0x7;
5193 uint32_t ea = GetEA(modrm, 0);
5194 READXMM(ea, s);
5195 for (int c=0;c <= 15;c++)
5196 XMM(d).b[c]=(XMM(d).c[c] == s.c[c]) ? 0xff : 0;
5197 }
5198 CYCLES(1); // TODO: correct cycle count
5199 }
5200
sse_pcmpeqw_r128_rm128()5201 void i386_device::sse_pcmpeqw_r128_rm128() // Opcode 66 0f 75
5202 {
5203 uint8_t modrm = FETCH();
5204 if( modrm >= 0xc0 ) {
5205 int s,d;
5206 s=modrm & 0x7;
5207 d=(modrm >> 3) & 0x7;
5208 for (int c=0;c <= 7;c++)
5209 XMM(d).w[c]=(XMM(d).s[c] == XMM(s).s[c]) ? 0xffff : 0;
5210 } else {
5211 XMM_REG s;
5212 int d=(modrm >> 3) & 0x7;
5213 uint32_t ea = GetEA(modrm, 0);
5214 READXMM(ea, s);
5215 for (int c=0;c <= 7;c++)
5216 XMM(d).w[c]=(XMM(d).s[c] == s.s[c]) ? 0xffff : 0;
5217 }
5218 CYCLES(1); // TODO: correct cycle count
5219 }
5220
sse_pcmpeqd_r128_rm128()5221 void i386_device::sse_pcmpeqd_r128_rm128() // Opcode 66 0f 76
5222 {
5223 uint8_t modrm = FETCH();
5224 if( modrm >= 0xc0 ) {
5225 int s,d;
5226 s=modrm & 0x7;
5227 d=(modrm >> 3) & 0x7;
5228 for (int c=0;c <= 3;c++)
5229 XMM(d).d[c]=(XMM(d).i[c] == XMM(s).i[c]) ? 0xffffffff : 0;
5230 } else {
5231 XMM_REG s;
5232 int d=(modrm >> 3) & 0x7;
5233 uint32_t ea = GetEA(modrm, 0);
5234 READXMM(ea, s);
5235 for (int c=0;c <= 3;c++)
5236 XMM(d).d[c]=(XMM(d).i[c] == s.i[c]) ? 0xffffffff : 0;
5237 }
5238 CYCLES(1); // TODO: correct cycle count
5239 }
5240
sse_paddq_r128_rm128()5241 void i386_device::sse_paddq_r128_rm128() // Opcode 66 0f d4
5242 {
5243 uint8_t modrm = FETCH();
5244 if( modrm >= 0xc0 ) {
5245 int s,d;
5246 s=modrm & 0x7;
5247 d=(modrm >> 3) & 0x7;
5248 XMM(d).q[0]=XMM(d).q[0]+XMM(s).q[0];
5249 XMM(d).q[1]=XMM(d).q[1]+XMM(s).q[1];
5250 } else {
5251 XMM_REG src;
5252 int d=(modrm >> 3) & 0x7;
5253 uint32_t ea = GetEA(modrm, 0);
5254 READXMM(ea, src);
5255 XMM(d).q[0]=XMM(d).q[0]+src.q[0];
5256 XMM(d).q[1]=XMM(d).q[1]+src.q[1];
5257 }
5258 CYCLES(1); // TODO: correct cycle count
5259 }
5260
sse_pmullw_r128_rm128()5261 void i386_device::sse_pmullw_r128_rm128() // Opcode 66 0f d5
5262 {
5263 uint8_t modrm = FETCH();
5264 if( modrm >= 0xc0 ) {
5265 int s,d;
5266 s=modrm & 0x7;
5267 d=(modrm >> 3) & 0x7;
5268 for (int n = 0; n < 8;n++)
5269 XMM(d).w[n]=(uint32_t)((int32_t)XMM(d).s[n]*(int32_t)XMM(s).s[n]) & 0xffff;
5270 } else {
5271 XMM_REG src;
5272 int d;
5273 uint32_t ea = GetEA(modrm, 0);
5274 READXMM(ea, src);
5275 d=(modrm >> 3) & 0x7;
5276 for (int n = 0; n < 8;n++)
5277 XMM(d).w[n]=(uint32_t)((int32_t)XMM(d).s[n]*(int32_t)src.s[n]) & 0xffff;
5278 }
5279 CYCLES(1); // TODO: correct cycle count
5280 }
5281
sse_paddb_r128_rm128()5282 void i386_device::sse_paddb_r128_rm128() // Opcode 66 0f fc
5283 {
5284 uint8_t modrm = FETCH();
5285 if( modrm >= 0xc0 ) {
5286 for (int n=0;n < 16;n++)
5287 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + XMM(modrm & 7).b[n];
5288 } else {
5289 XMM_REG s;
5290 uint32_t ea = GetEA(modrm, 0);
5291 READXMM(ea, s);
5292 for (int n=0;n < 16;n++)
5293 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] + s.b[n];
5294 }
5295 CYCLES(1); // TODO: correct cycle count
5296 }
5297
sse_paddw_r128_rm128()5298 void i386_device::sse_paddw_r128_rm128() // Opcode 66 0f fd
5299 {
5300 uint8_t modrm = FETCH();
5301 if( modrm >= 0xc0 ) {
5302 for (int n=0;n < 8;n++)
5303 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + XMM(modrm & 7).w[n];
5304 } else {
5305 XMM_REG s;
5306 uint32_t ea = GetEA(modrm, 0);
5307 READXMM(ea, s);
5308 for (int n=0;n < 8;n++)
5309 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] + s.w[n];
5310 }
5311 CYCLES(1); // TODO: correct cycle count
5312 }
5313
sse_paddd_r128_rm128()5314 void i386_device::sse_paddd_r128_rm128() // Opcode 66 0f fe
5315 {
5316 uint8_t modrm = FETCH();
5317 if( modrm >= 0xc0 ) {
5318 for (int n=0;n < 4;n++)
5319 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + XMM(modrm & 7).d[n];
5320 } else {
5321 XMM_REG s;
5322 uint32_t ea = GetEA(modrm, 0);
5323 READXMM(ea, s);
5324 for (int n=0;n < 4;n++)
5325 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] + s.d[n];
5326 }
5327 CYCLES(1); // TODO: correct cycle count
5328 }
5329
sse_psubusb_r128_rm128()5330 void i386_device::sse_psubusb_r128_rm128() // Opcode 66 0f d8
5331 {
5332 uint8_t modrm = FETCH();
5333 if( modrm >= 0xc0 ) {
5334 for (int n=0;n < 16;n++)
5335 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < XMM(modrm & 7).b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-XMM(modrm & 7).b[n];
5336 } else {
5337 XMM_REG src;
5338 uint32_t ea = GetEA(modrm, 0);
5339 READXMM(ea, src);
5340 for (int n=0;n < 16;n++)
5341 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] < src.b[n] ? 0 : XMM((modrm >> 3) & 0x7).b[n]-src.b[n];
5342 }
5343 CYCLES(1); // TODO: correct cycle count
5344 }
5345
sse_psubusw_r128_rm128()5346 void i386_device::sse_psubusw_r128_rm128() // Opcode 66 0f d9
5347 {
5348 uint8_t modrm = FETCH();
5349 if( modrm >= 0xc0 ) {
5350 for (int n=0;n < 8;n++)
5351 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < XMM(modrm & 7).w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-XMM(modrm & 7).w[n];
5352 } else {
5353 XMM_REG src;
5354 uint32_t ea = GetEA(modrm, 0);
5355 READXMM(ea, src);
5356 for (int n=0;n < 8;n++)
5357 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] < src.w[n] ? 0 : XMM((modrm >> 3) & 0x7).w[n]-src.w[n];
5358 }
5359 CYCLES(1); // TODO: correct cycle count
5360 }
5361
sse_pand_r128_rm128()5362 void i386_device::sse_pand_r128_rm128() // Opcode 66 0f db
5363 {
5364 uint8_t modrm = FETCH();
5365 if( modrm >= 0xc0 ) {
5366 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & XMM(modrm & 7).q[0];
5367 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & XMM(modrm & 7).q[1];
5368 } else {
5369 XMM_REG src;
5370 uint32_t ea = GetEA(modrm, 0);
5371 READXMM(ea, src);
5372 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] & src.q[0];
5373 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] & src.q[1];
5374 }
5375 CYCLES(1); // TODO: correct cycle count
5376 }
5377
sse_pandn_r128_rm128()5378 void i386_device::sse_pandn_r128_rm128() // Opcode 66 0f df
5379 {
5380 uint8_t modrm = FETCH();
5381 if( modrm >= 0xc0 ) {
5382 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & XMM(modrm & 7).q[0];
5383 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & XMM(modrm & 7).q[1];
5384 } else {
5385 XMM_REG src;
5386 uint32_t ea = GetEA(modrm, 0);
5387 READXMM(ea, src);
5388 XMM((modrm >> 3) & 0x7).q[0]=(~XMM((modrm >> 3) & 0x7).q[0]) & src.q[0];
5389 XMM((modrm >> 3) & 0x7).q[1]=(~XMM((modrm >> 3) & 0x7).q[1]) & src.q[1];
5390 }
5391 CYCLES(1); // TODO: correct cycle count
5392 }
5393
sse_paddusb_r128_rm128()5394 void i386_device::sse_paddusb_r128_rm128() // Opcode 66 0f dc
5395 {
5396 uint8_t modrm = FETCH();
5397 if( modrm >= 0xc0 ) {
5398 for (int n=0;n < 16;n++)
5399 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-XMM(modrm & 7).b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+XMM(modrm & 7).b[n];
5400 } else {
5401 XMM_REG src;
5402 uint32_t ea = GetEA(modrm, 0);
5403 READXMM(ea, src);
5404 for (int n=0;n < 16;n++)
5405 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] > (0xff-src.b[n]) ? 0xff : XMM((modrm >> 3) & 0x7).b[n]+src.b[n];
5406 }
5407 CYCLES(1); // TODO: correct cycle count
5408 }
5409
sse_paddusw_r128_rm128()5410 void i386_device::sse_paddusw_r128_rm128() // Opcode 66 0f dd
5411 {
5412 uint8_t modrm = FETCH();
5413 if( modrm >= 0xc0 ) {
5414 for (int n=0;n < 8;n++)
5415 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-XMM(modrm & 7).w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+XMM(modrm & 7).w[n];
5416 } else {
5417 XMM_REG src;
5418 uint32_t ea = GetEA(modrm, 0);
5419 READXMM(ea, src);
5420 for (int n=0;n < 8;n++)
5421 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] > (0xffff-src.w[n]) ? 0xffff : XMM((modrm >> 3) & 0x7).w[n]+src.w[n];
5422 }
5423 CYCLES(1); // TODO: correct cycle count
5424 }
5425
sse_pmaxub_r128_rm128()5426 void i386_device::sse_pmaxub_r128_rm128() // Opcode 66 0f de
5427 {
5428 uint8_t modrm = FETCH();
5429 if( modrm >= 0xc0 ) {
5430 for (int n=0;n < 16;n++)
5431 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > XMM(modrm & 0x7).b[n] ? XMM((modrm >> 3) & 0x7).b[n] : XMM(modrm & 0x7).b[n];
5432 } else {
5433 XMM_REG s;
5434 uint32_t ea = GetEA(modrm, 0);
5435 READXMM(ea, s);
5436 for (int n=0;n < 16;n++)
5437 XMM((modrm >> 3) & 0x7).b[n] = XMM((modrm >> 3) & 0x7).b[n] > s.b[n] ? XMM((modrm >> 3) & 0x7).b[n] : s.b[n];
5438 }
5439 CYCLES(1); // TODO: correct cycle count
5440 }
5441
sse_pmulhuw_r128_rm128()5442 void i386_device::sse_pmulhuw_r128_rm128() // Opcode 66 0f e4
5443 {
5444 uint8_t modrm = FETCH();
5445 if( modrm >= 0xc0 ) {
5446 for (int n=0;n < 8;n++)
5447 XMM((modrm >> 3) & 0x7).w[n]=((uint32_t)XMM((modrm >> 3) & 0x7).w[n]*(uint32_t)XMM(modrm & 7).w[n]) >> 16;
5448 } else {
5449 XMM_REG s;
5450 uint32_t ea = GetEA(modrm, 0);
5451 READXMM(ea, s);
5452 for (int n=0;n < 8;n++)
5453 XMM((modrm >> 3) & 0x7).w[n]=((uint32_t)XMM((modrm >> 3) & 0x7).w[n]*(uint32_t)s.w[n]) >> 16;
5454 }
5455 CYCLES(1); // TODO: correct cycle count
5456 }
5457
sse_pmulhw_r128_rm128()5458 void i386_device::sse_pmulhw_r128_rm128() // Opcode 66 0f e5
5459 {
5460 uint8_t modrm = FETCH();
5461 if( modrm >= 0xc0 ) {
5462 for (int n=0;n < 8;n++)
5463 XMM((modrm >> 3) & 0x7).w[n]=(uint32_t)((int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)XMM(modrm & 7).s[n]) >> 16;
5464 } else {
5465 XMM_REG src;
5466 uint32_t ea = GetEA(modrm, 0);
5467 READXMM(ea, src);
5468 for (int n=0;n < 8;n++)
5469 XMM((modrm >> 3) & 0x7).w[n]=(uint32_t)((int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)src.s[n]) >> 16;
5470 }
5471 CYCLES(1); // TODO: correct cycle count
5472 }
5473
sse_psubsb_r128_rm128()5474 void i386_device::sse_psubsb_r128_rm128() // Opcode 66 0f e8
5475 {
5476 uint8_t modrm = FETCH();
5477 if( modrm >= 0xc0 ) {
5478 for (int n=0;n < 16;n++)
5479 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)XMM((modrm >> 3) & 0x7).c[n] - (int16_t)XMM(modrm & 7).c[n]);
5480 } else {
5481 XMM_REG s;
5482 uint32_t ea = GetEA(modrm, 0);
5483 READXMM(ea, s);
5484 for (int n=0;n < 16;n++)
5485 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)XMM((modrm >> 3) & 0x7).c[n] - (int16_t)s.c[n]);
5486 }
5487 CYCLES(1); // TODO: correct cycle count
5488 }
5489
sse_psubsw_r128_rm128()5490 void i386_device::sse_psubsw_r128_rm128() // Opcode 66 0f e9
5491 {
5492 uint8_t modrm = FETCH();
5493 if( modrm >= 0xc0 ) {
5494 for (int n=0;n < 8;n++)
5495 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)XMM((modrm >> 3) & 0x7).s[n] - (int32_t)XMM(modrm & 7).s[n]);
5496 } else {
5497 XMM_REG s;
5498 uint32_t ea = GetEA(modrm, 0);
5499 READXMM(ea, s);
5500 for (int n=0;n < 8;n++)
5501 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)XMM((modrm >> 3) & 0x7).s[n] - (int32_t)s.s[n]);
5502 }
5503 CYCLES(1); // TODO: correct cycle count
5504 }
5505
sse_pminsw_r128_rm128()5506 void i386_device::sse_pminsw_r128_rm128() // Opcode 66 0f ea
5507 {
5508 uint8_t modrm = FETCH();
5509 if( modrm >= 0xc0 ) {
5510 for (int n=0;n < 8;n++)
5511 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5512 } else {
5513 XMM_REG s;
5514 uint32_t ea = GetEA(modrm, 0);
5515 READXMM(ea, s);
5516 for (int n=0;n < 8;n++)
5517 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] < s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5518 }
5519 CYCLES(1); // TODO: correct cycle count
5520 }
5521
sse_pmaxsw_r128_rm128()5522 void i386_device::sse_pmaxsw_r128_rm128() // Opcode 66 0f ee
5523 {
5524 uint8_t modrm = FETCH();
5525 if( modrm >= 0xc0 ) {
5526 for (int n=0;n < 8;n++)
5527 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > XMM(modrm & 0x7).s[n] ? XMM((modrm >> 3) & 0x7).s[n] : XMM(modrm & 0x7).s[n];
5528 } else {
5529 XMM_REG s;
5530 uint32_t ea = GetEA(modrm, 0);
5531 READXMM(ea, s);
5532 for (int n=0;n < 8;n++)
5533 XMM((modrm >> 3) & 0x7).s[n] = XMM((modrm >> 3) & 0x7).s[n] > s.s[n] ? XMM((modrm >> 3) & 0x7).s[n] : s.s[n];
5534 }
5535 CYCLES(1); // TODO: correct cycle count
5536 }
5537
sse_paddsb_r128_rm128()5538 void i386_device::sse_paddsb_r128_rm128() // Opcode 66 0f ec
5539 {
5540 uint8_t modrm = FETCH();
5541 if( modrm >= 0xc0 ) {
5542 for (int n=0;n < 16;n++)
5543 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)XMM((modrm >> 3) & 0x7).c[n] + (int16_t)XMM(modrm & 7).c[n]);
5544 } else {
5545 XMM_REG s;
5546 uint32_t ea = GetEA(modrm, 0);
5547 READXMM(ea, s);
5548 for (int n=0;n < 16;n++)
5549 XMM((modrm >> 3) & 0x7).c[n]=SaturatedSignedWordToSignedByte((int16_t)XMM((modrm >> 3) & 0x7).c[n] + (int16_t)s.c[n]);
5550 }
5551 CYCLES(1); // TODO: correct cycle count
5552 }
5553
sse_paddsw_r128_rm128()5554 void i386_device::sse_paddsw_r128_rm128() // Opcode 66 0f ed
5555 {
5556 uint8_t modrm = FETCH();
5557 if( modrm >= 0xc0 ) {
5558 for (int n=0;n < 8;n++)
5559 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)XMM((modrm >> 3) & 0x7).s[n] + (int32_t)XMM(modrm & 7).s[n]);
5560 } else {
5561 XMM_REG s;
5562 uint32_t ea = GetEA(modrm, 0);
5563 READXMM(ea, s);
5564 for (int n=0;n < 8;n++)
5565 XMM((modrm >> 3) & 0x7).s[n]=SaturatedSignedDwordToSignedWord((int32_t)XMM((modrm >> 3) & 0x7).s[n] + (int32_t)s.s[n]);
5566 }
5567 CYCLES(1); // TODO: correct cycle count
5568 }
5569
sse_por_r128_rm128()5570 void i386_device::sse_por_r128_rm128() // Opcode 66 0f eb
5571 {
5572 uint8_t modrm = FETCH();
5573 if( modrm >= 0xc0 ) {
5574 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | XMM(modrm & 7).q[0];
5575 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | XMM(modrm & 7).q[1];
5576 } else {
5577 XMM_REG s;
5578 uint32_t ea = GetEA(modrm, 0);
5579 READXMM(ea, s);
5580 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] | s.q[0];
5581 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] | s.q[1];
5582 }
5583 CYCLES(1); // TODO: correct cycle count
5584 }
5585
sse_pxor_r128_rm128()5586 void i386_device::sse_pxor_r128_rm128() // Opcode 66 0f ef
5587 {
5588 uint8_t modrm = FETCH();
5589 if( modrm >= 0xc0 ) {
5590 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ XMM(modrm & 7).q[0];
5591 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ XMM(modrm & 7).q[1];
5592 } else {
5593 XMM_REG s;
5594 uint32_t ea = GetEA(modrm, 0);
5595 READXMM(ea, s);
5596 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] ^ s.q[0];
5597 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] ^ s.q[1];
5598 }
5599 CYCLES(1); // TODO: correct cycle count
5600 }
5601
sse_pmaddwd_r128_rm128()5602 void i386_device::sse_pmaddwd_r128_rm128() // Opcode 66 0f f5
5603 {
5604 uint8_t modrm = FETCH();
5605 if( modrm >= 0xc0 ) {
5606 for (int n=0;n < 4;n++)
5607 XMM((modrm >> 3) & 0x7).i[n]=(int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)XMM(modrm & 7).s[n]+
5608 (int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)XMM(modrm & 7).s[n];
5609 } else {
5610 XMM_REG s;
5611 uint32_t ea = GetEA(modrm, 0);
5612 READXMM(ea, s);
5613 for (int n=0;n < 4;n++)
5614 XMM((modrm >> 3) & 0x7).i[n]=(int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)s.s[n]+
5615 (int32_t)XMM((modrm >> 3) & 0x7).s[n]*(int32_t)s.s[n];
5616 }
5617 CYCLES(1); // TODO: correct cycle count
5618 }
5619
sse_psubb_r128_rm128()5620 void i386_device::sse_psubb_r128_rm128() // Opcode 66 0f f8
5621 {
5622 uint8_t modrm = FETCH();
5623 if( modrm >= 0xc0 ) {
5624 for (int n=0;n < 16;n++)
5625 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - XMM(modrm & 7).b[n];
5626 } else {
5627 XMM_REG s;
5628 uint32_t ea = GetEA(modrm, 0);
5629 READXMM(ea, s);
5630 for (int n=0;n < 16;n++)
5631 XMM((modrm >> 3) & 0x7).b[n]=XMM((modrm >> 3) & 0x7).b[n] - s.b[n];
5632 }
5633 CYCLES(1); // TODO: correct cycle count
5634 }
5635
sse_psubw_r128_rm128()5636 void i386_device::sse_psubw_r128_rm128() // Opcode 66 0f f9
5637 {
5638 uint8_t modrm = FETCH();
5639 if( modrm >= 0xc0 ) {
5640 for (int n=0;n < 8;n++)
5641 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - XMM(modrm & 7).w[n];
5642 } else {
5643 XMM_REG s;
5644 uint32_t ea = GetEA(modrm, 0);
5645 READXMM(ea, s);
5646 for (int n=0;n < 8;n++)
5647 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] - s.w[n];
5648 }
5649 CYCLES(1); // TODO: correct cycle count
5650 }
5651
sse_psubd_r128_rm128()5652 void i386_device::sse_psubd_r128_rm128() // Opcode 66 0f fa
5653 {
5654 uint8_t modrm = FETCH();
5655 if( modrm >= 0xc0 ) {
5656 for (int n=0;n < 4;n++)
5657 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - XMM(modrm & 7).d[n];
5658 } else {
5659 XMM_REG s;
5660 uint32_t ea = GetEA(modrm, 0);
5661 READXMM(ea, s);
5662 for (int n=0;n < 4;n++)
5663 XMM((modrm >> 3) & 0x7).d[n]=XMM((modrm >> 3) & 0x7).d[n] - s.d[n];
5664 }
5665 CYCLES(1); // TODO: correct cycle count
5666 }
5667
sse_psadbw_r128_rm128()5668 void i386_device::sse_psadbw_r128_rm128() // Opcode 66 0f f6
5669 {
5670 int32_t temp;
5671 uint8_t modrm = FETCH();
5672 if( modrm >= 0xc0 ) {
5673 temp=0;
5674 for (int n=0;n < 8;n++)
5675 temp += abs((int32_t)XMM((modrm >> 3) & 0x7).b[n] - (int32_t)XMM(modrm & 0x7).b[n]);
5676 XMM((modrm >> 3) & 0x7).l[0]=(uint64_t)temp & 0xffff;
5677 temp=0;
5678 for (int n=8;n < 16;n++)
5679 temp += abs((int32_t)XMM((modrm >> 3) & 0x7).b[n] - (int32_t)XMM(modrm & 0x7).b[n]);
5680 XMM((modrm >> 3) & 0x7).l[1]=(uint64_t)temp & 0xffff;
5681 } else {
5682 XMM_REG s;
5683 uint32_t ea = GetEA(modrm, 0);
5684 READXMM(ea, s);
5685 temp=0;
5686 for (int n=0;n < 8;n++)
5687 temp += abs((int32_t)XMM((modrm >> 3) & 0x7).b[n] - (int32_t)s.b[n]);
5688 XMM((modrm >> 3) & 0x7).l[0]=(uint64_t)temp & 0xffff;
5689 temp=0;
5690 for (int n=8;n < 16;n++)
5691 temp += abs((int32_t)XMM((modrm >> 3) & 0x7).b[n] - (int32_t)s.b[n]);
5692 XMM((modrm >> 3) & 0x7).l[1]=(uint64_t)temp & 0xffff;
5693 }
5694 CYCLES(1); // TODO: correct cycle count
5695 }
5696
sse_pavgb_r128_rm128()5697 void i386_device::sse_pavgb_r128_rm128() // Opcode 66 0f e0
5698 {
5699 uint8_t modrm = FETCH();
5700 if( modrm >= 0xc0 ) {
5701 for (int n=0;n < 16;n++)
5702 XMM((modrm >> 3) & 0x7).b[n] = ((uint16_t)XMM((modrm >> 3) & 0x7).b[n] + (uint16_t)XMM(modrm & 0x7).b[n] + 1) >> 1;
5703 } else {
5704 XMM_REG s;
5705 uint32_t ea = GetEA(modrm, 0);
5706 READXMM(ea, s);
5707 for (int n=0;n < 16;n++)
5708 XMM((modrm >> 3) & 0x7).b[n] = ((uint16_t)XMM((modrm >> 3) & 0x7).b[n] + (uint16_t)s.b[n] + 1) >> 1;
5709 }
5710 CYCLES(1); // TODO: correct cycle count
5711 }
5712
sse_pavgw_r128_rm128()5713 void i386_device::sse_pavgw_r128_rm128() // Opcode 66 0f e3
5714 {
5715 uint8_t modrm = FETCH();
5716 if( modrm >= 0xc0 ) {
5717 for (int n=0;n < 8;n++)
5718 XMM((modrm >> 3) & 0x7).w[n] = ((uint32_t)XMM((modrm >> 3) & 0x7).w[n] + (uint32_t)XMM(modrm & 0x7).w[n] + 1) >> 1;
5719 } else {
5720 XMM_REG s;
5721 uint32_t ea = GetEA(modrm, 0);
5722 READXMM(ea, s);
5723 for (int n=0;n < 8;n++)
5724 XMM((modrm >> 3) & 0x7).w[n] = ((uint32_t)XMM((modrm >> 3) & 0x7).w[n] + (uint32_t)s.w[n] + 1) >> 1;
5725 }
5726 CYCLES(1); // TODO: correct cycle count
5727 }
5728
sse_psrlw_r128_rm128()5729 void i386_device::sse_psrlw_r128_rm128() // Opcode 66 0f d1
5730 {
5731 uint8_t modrm = FETCH();
5732 if( modrm >= 0xc0 ) {
5733 int count=(int)XMM(modrm & 7).q[0];
5734 for (int n=0; n < 8;n++)
5735 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5736 } else {
5737 XMM_REG src;
5738 uint32_t ea = GetEA(modrm, 0);
5739 READXMM(ea, src);
5740 int count=(int)src.q[0];
5741 for (int n=0; n < 8;n++)
5742 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] >> count;
5743 }
5744 CYCLES(1); // TODO: correct cycle count
5745 }
5746
sse_psrld_r128_rm128()5747 void i386_device::sse_psrld_r128_rm128() // Opcode 66 0f d2
5748 {
5749 uint8_t modrm = FETCH();
5750 if( modrm >= 0xc0 ) {
5751 int count=(int)XMM(modrm & 7).q[0];
5752 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5753 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5754 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5755 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5756 } else {
5757 XMM_REG src;
5758 uint32_t ea = GetEA(modrm, 0);
5759 READXMM(ea, src);
5760 int count=(int)src.q[0];
5761 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] >> count;
5762 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] >> count;
5763 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] >> count;
5764 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] >> count;
5765 }
5766 CYCLES(1); // TODO: correct cycle count
5767 }
5768
sse_psrlq_r128_rm128()5769 void i386_device::sse_psrlq_r128_rm128() // Opcode 66 0f d3
5770 {
5771 uint8_t modrm = FETCH();
5772 if( modrm >= 0xc0 ) {
5773 int count=(int)XMM(modrm & 7).q[0];
5774 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5775 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5776 } else {
5777 XMM_REG src;
5778 uint32_t ea = GetEA(modrm, 0);
5779 READXMM(ea, src);
5780 int count=(int)src.q[0];
5781 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] >> count;
5782 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] >> count;
5783 }
5784 CYCLES(1); // TODO: correct cycle count
5785 }
5786
sse_psllw_r128_rm128()5787 void i386_device::sse_psllw_r128_rm128() // Opcode 66 0f f1
5788 {
5789 uint8_t modrm = FETCH();
5790 if( modrm >= 0xc0 ) {
5791 int count=(int)XMM(modrm & 7).q[0];
5792 for (int n=0; n < 8;n++)
5793 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5794 } else {
5795 XMM_REG s;
5796 uint32_t ea = GetEA(modrm, 0);
5797 READXMM(ea, s);
5798 int count=(int)s.q[0];
5799 for (int n=0; n < 8;n++)
5800 XMM((modrm >> 3) & 0x7).w[n]=XMM((modrm >> 3) & 0x7).w[n] << count;
5801 }
5802 CYCLES(1); // TODO: correct cycle count
5803 }
5804
sse_pslld_r128_rm128()5805 void i386_device::sse_pslld_r128_rm128() // Opcode 66 0f f2
5806 {
5807 uint8_t modrm = FETCH();
5808 if( modrm >= 0xc0 ) {
5809 int count=(int)XMM(modrm & 7).q[0];
5810 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5811 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5812 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5813 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5814 } else {
5815 XMM_REG s;
5816 uint32_t ea = GetEA(modrm, 0);
5817 READXMM(ea, s);
5818 int count=(int)s.q[0];
5819 XMM((modrm >> 3) & 0x7).d[0]=XMM((modrm >> 3) & 0x7).d[0] << count;
5820 XMM((modrm >> 3) & 0x7).d[1]=XMM((modrm >> 3) & 0x7).d[1] << count;
5821 XMM((modrm >> 3) & 0x7).d[2]=XMM((modrm >> 3) & 0x7).d[2] << count;
5822 XMM((modrm >> 3) & 0x7).d[3]=XMM((modrm >> 3) & 0x7).d[3] << count;
5823 }
5824 CYCLES(1); // TODO: correct cycle count
5825 }
5826
sse_psllq_r128_rm128()5827 void i386_device::sse_psllq_r128_rm128() // Opcode 66 0f f3
5828 {
5829 uint8_t modrm = FETCH();
5830 if( modrm >= 0xc0 ) {
5831 int count=(int)XMM(modrm & 7).q[0];
5832 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5833 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5834 } else {
5835 XMM_REG s;
5836 uint32_t ea = GetEA(modrm, 0);
5837 READXMM(ea, s);
5838 int count=(int)s.q[0];
5839 XMM((modrm >> 3) & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0] << count;
5840 XMM((modrm >> 3) & 0x7).q[1]=XMM((modrm >> 3) & 0x7).q[1] << count;
5841 }
5842 CYCLES(1); // TODO: correct cycle count
5843 }
5844
sse_psraw_r128_rm128()5845 void i386_device::sse_psraw_r128_rm128() // Opcode 66 0f e1
5846 {
5847 uint8_t modrm = FETCH();
5848 if( modrm >= 0xc0 ) {
5849 int count=(int)XMM(modrm & 7).q[0];
5850 for (int n=0; n < 8;n++)
5851 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5852 } else {
5853 XMM_REG src;
5854 uint32_t ea = GetEA(modrm, 0);
5855 READXMM(ea, src);
5856 int count=(int)src.q[0];
5857 for (int n=0; n < 8;n++)
5858 XMM((modrm >> 3) & 0x7).s[n]=XMM((modrm >> 3) & 0x7).s[n] >> count;
5859 }
5860 CYCLES(1); // TODO: correct cycle count
5861 }
5862
sse_psrad_r128_rm128()5863 void i386_device::sse_psrad_r128_rm128() // Opcode 66 0f e2
5864 {
5865 uint8_t modrm = FETCH();
5866 if( modrm >= 0xc0 ) {
5867 int count=(int)XMM(modrm & 7).q[0];
5868 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5869 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5870 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5871 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5872 } else {
5873 XMM_REG src;
5874 uint32_t ea = GetEA(modrm, 0);
5875 READXMM(ea, src);
5876 int count=(int)src.q[0];
5877 XMM((modrm >> 3) & 0x7).i[0]=XMM((modrm >> 3) & 0x7).i[0] >> count;
5878 XMM((modrm >> 3) & 0x7).i[1]=XMM((modrm >> 3) & 0x7).i[1] >> count;
5879 XMM((modrm >> 3) & 0x7).i[2]=XMM((modrm >> 3) & 0x7).i[2] >> count;
5880 XMM((modrm >> 3) & 0x7).i[3]=XMM((modrm >> 3) & 0x7).i[3] >> count;
5881 }
5882 CYCLES(1); // TODO: correct cycle count
5883 }
5884
sse_movntdq_m128_r128()5885 void i386_device::sse_movntdq_m128_r128() // Opcode 66 0f e7
5886 {
5887 uint8_t modrm = FETCH();
5888 if( modrm >= 0xc0 ) {
5889 CYCLES(1); // unsupported
5890 } else {
5891 // TODO: manage the cache if present
5892 uint32_t ea = GetEA(modrm, 0);
5893 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
5894 CYCLES(1); // TODO: correct cycle count
5895 }
5896 }
5897
sse_cvttpd2dq_r128_rm128()5898 void i386_device::sse_cvttpd2dq_r128_rm128() // Opcode 66 0f e6
5899 {
5900 uint8_t modrm = FETCH();
5901 if( modrm >= 0xc0 ) {
5902 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)XMM((modrm >> 3) & 0x7).f64[0];
5903 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)XMM((modrm >> 3) & 0x7).f64[1];
5904 XMM((modrm >> 3) & 0x7).q[1] = 0;
5905 } else {
5906 XMM_REG src;
5907 uint32_t ea = GetEA(modrm, 0);
5908 READXMM(ea, src);
5909 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)src.f64[0];
5910 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)src.f64[1];
5911 XMM((modrm >> 3) & 0x7).q[1] = 0;
5912 }
5913 CYCLES(1); // TODO: correct cycle count
5914 }
5915
sse_movq_r128m64_r128()5916 void i386_device::sse_movq_r128m64_r128() // Opcode 66 0f d6
5917 {
5918 uint8_t modrm = FETCH();
5919 if( modrm >= 0xc0 ) {
5920 XMM(modrm & 0x7).q[0]=XMM((modrm >> 3) & 0x7).q[0];
5921 XMM(modrm & 0x7).q[1] = 0;
5922 } else {
5923 uint32_t ea = GetEA(modrm, 0);
5924 WRITE64(ea, XMM((modrm >> 3) & 0x7).q[0]);
5925 }
5926 CYCLES(1); // TODO: correct cycle count
5927 }
5928
sse_addsubpd_r128_rm128()5929 void i386_device::sse_addsubpd_r128_rm128() // Opcode 66 0f d0
5930 {
5931 uint8_t modrm = FETCH();
5932 if( modrm >= 0xc0 ) {
5933 int s, d;
5934 s=modrm & 0x7;
5935 d=(modrm >> 3) & 0x7;
5936 XMM(d).f64[0]=XMM(d).f64[0]-XMM(s).f64[0];
5937 XMM(d).f64[1]=XMM(d).f64[1]+XMM(s).f64[1];
5938 } else {
5939 XMM_REG src;
5940 int d;
5941 uint32_t ea = GetEA(modrm, 0);
5942 d=(modrm >> 3) & 0x7;
5943 READXMM(ea, src);
5944 XMM(d).f64[0]=XMM(d).f64[0]-src.f64[0];
5945 XMM(d).f64[1]=XMM(d).f64[1]+src.f64[1];
5946 }
5947 CYCLES(1); // TODO: correct cycle count
5948 }
5949
sse_haddpd_r128_rm128()5950 void i386_device::sse_haddpd_r128_rm128() // Opcode 66 0f 7c
5951 {
5952 uint8_t modrm = FETCH();
5953 if( modrm >= 0xc0 ) {
5954 int s, d;
5955 s=modrm & 0x7;
5956 d=(modrm >> 3) & 0x7;
5957 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5958 XMM(d).f64[1]=XMM(s).f64[0]+XMM(s).f64[1];
5959 } else {
5960 XMM_REG src;
5961 int d;
5962 uint32_t ea = GetEA(modrm, 0);
5963 d=(modrm >> 3) & 0x7;
5964 READXMM(ea, src);
5965 XMM(d).f64[0]=XMM(d).f64[0]+XMM(d).f64[1];
5966 XMM(d).f64[1]=src.f64[0]+src.f64[1];
5967 }
5968 CYCLES(1); // TODO: correct cycle count
5969 }
5970
sse_hsubpd_r128_rm128()5971 void i386_device::sse_hsubpd_r128_rm128() // Opcode 66 0f 7d
5972 {
5973 uint8_t modrm = FETCH();
5974 if( modrm >= 0xc0 ) {
5975 int s, d;
5976 s=modrm & 0x7;
5977 d=(modrm >> 3) & 0x7;
5978 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5979 XMM(d).f64[1]=XMM(s).f64[0]-XMM(s).f64[1];
5980 } else {
5981 XMM_REG src;
5982 int d;
5983 uint32_t ea = GetEA(modrm, 0);
5984 d=(modrm >> 3) & 0x7;
5985 READXMM(ea, src);
5986 XMM(d).f64[0]=XMM(d).f64[0]-XMM(d).f64[1];
5987 XMM(d).f64[1]=src.f64[0]-src.f64[1];
5988 }
5989 CYCLES(1); // TODO: correct cycle count
5990 }
5991
sse_sqrtpd_r128_rm128()5992 void i386_device::sse_sqrtpd_r128_rm128() // Opcode 66 0f 51
5993 {
5994 uint8_t modrm = FETCH();
5995 if( modrm >= 0xc0 ) {
5996 int s, d;
5997 s=modrm & 0x7;
5998 d=(modrm >> 3) & 0x7;
5999 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
6000 XMM(d).f64[1]=sqrt(XMM(s).f64[1]);
6001 } else {
6002 XMM_REG src;
6003 int d;
6004 uint32_t ea = GetEA(modrm, 0);
6005 d=(modrm >> 3) & 0x7;
6006 READXMM(ea, src);
6007 XMM(d).f64[0]=sqrt(src.f64[0]);
6008 XMM(d).f64[1]=sqrt(src.f64[1]);
6009 }
6010 CYCLES(1); // TODO: correct cycle count
6011 }
6012
sse_cvtpi2pd_r128_rm64()6013 void i386_device::sse_cvtpi2pd_r128_rm64() // Opcode 66 0f 2a
6014 {
6015 uint8_t modrm = FETCH();
6016 if( modrm >= 0xc0 ) {
6017 MMXPROLOG();
6018 XMM((modrm >> 3) & 0x7).f64[0] = (double)MMX(modrm & 0x7).i[0];
6019 XMM((modrm >> 3) & 0x7).f64[1] = (double)MMX(modrm & 0x7).i[1];
6020 } else {
6021 MMX_REG r;
6022 uint32_t ea = GetEA(modrm, 0);
6023 READMMX(ea, r);
6024 XMM((modrm >> 3) & 0x7).f64[0] = (double)r.i[0];
6025 XMM((modrm >> 3) & 0x7).f64[1] = (double)r.i[1];
6026 }
6027 CYCLES(1); // TODO: correct cycle count
6028 }
6029
sse_cvttpd2pi_r64_rm128()6030 void i386_device::sse_cvttpd2pi_r64_rm128() // Opcode 66 0f 2c
6031 {
6032 uint8_t modrm = FETCH();
6033 MMXPROLOG();
6034 if( modrm >= 0xc0 ) {
6035 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6036 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6037 } else {
6038 XMM_REG r;
6039 uint32_t ea = GetEA(modrm, 0);
6040 READXMM(ea, r);
6041 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6042 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6043 }
6044 CYCLES(1); // TODO: correct cycle count
6045 }
6046
sse_cvtpd2pi_r64_rm128()6047 void i386_device::sse_cvtpd2pi_r64_rm128() // Opcode 66 0f 2d
6048 {
6049 uint8_t modrm = FETCH();
6050 MMXPROLOG();
6051 if( modrm >= 0xc0 ) {
6052 MMX((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f64[0];
6053 MMX((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f64[1];
6054 } else {
6055 XMM_REG r;
6056 uint32_t ea = GetEA(modrm, 0);
6057 READXMM(ea, r);
6058 MMX((modrm >> 3) & 0x7).i[0] = r.f64[0];
6059 MMX((modrm >> 3) & 0x7).i[1] = r.f64[1];
6060 }
6061 CYCLES(1); // TODO: correct cycle count
6062 }
6063
sse_cvtpd2ps_r128_rm128()6064 void i386_device::sse_cvtpd2ps_r128_rm128() // Opcode 66 0f 5a
6065 {
6066 uint8_t modrm = FETCH();
6067 if( modrm >= 0xc0 ) {
6068 XMM((modrm >> 3) & 0x7).f[0] = (float)XMM(modrm & 0x7).f64[0];
6069 XMM((modrm >> 3) & 0x7).f[1] = (float)XMM(modrm & 0x7).f64[1];
6070 XMM((modrm >> 3) & 0x7).q[1] = 0;
6071 } else {
6072 XMM_REG r;
6073 uint32_t ea = GetEA(modrm, 0);
6074 READXMM(ea, r);
6075 XMM((modrm >> 3) & 0x7).f[0] = (float)r.f64[0];
6076 XMM((modrm >> 3) & 0x7).f[1] = (float)r.f64[1];
6077 XMM((modrm >> 3) & 0x7).q[1] = 0;
6078 }
6079 CYCLES(1); // TODO: correct cycle count
6080 }
6081
sse_cvtps2dq_r128_rm128()6082 void i386_device::sse_cvtps2dq_r128_rm128() // Opcode 66 0f 5b
6083 {
6084 uint8_t modrm = FETCH();
6085 if( modrm >= 0xc0 ) {
6086 XMM((modrm >> 3) & 0x7).i[0] = XMM(modrm & 0x7).f[0];
6087 XMM((modrm >> 3) & 0x7).i[1] = XMM(modrm & 0x7).f[1];
6088 XMM((modrm >> 3) & 0x7).i[2] = XMM(modrm & 0x7).f[2];
6089 XMM((modrm >> 3) & 0x7).i[3] = XMM(modrm & 0x7).f[3];
6090 } else {
6091 XMM_REG r;
6092 uint32_t ea = GetEA(modrm, 0);
6093 READXMM(ea, r);
6094 XMM((modrm >> 3) & 0x7).i[0] = r.f[0];
6095 XMM((modrm >> 3) & 0x7).i[1] = r.f[1];
6096 XMM((modrm >> 3) & 0x7).i[2] = r.f[2];
6097 XMM((modrm >> 3) & 0x7).i[3] = r.f[3];
6098 }
6099 CYCLES(1); // TODO: correct cycle count
6100 }
6101
sse_addpd_r128_rm128()6102 void i386_device::sse_addpd_r128_rm128() // Opcode 66 0f 58
6103 {
6104 uint8_t modrm = FETCH();
6105 if( modrm >= 0xc0 ) {
6106 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6107 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + XMM(modrm & 0x7).f64[1];
6108 } else {
6109 XMM_REG src;
6110 uint32_t ea = GetEA(modrm, 0);
6111 READXMM(ea, src);
6112 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6113 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] + src.f64[1];
6114 }
6115 CYCLES(1); // TODO: correct cycle count
6116 }
6117
sse_mulpd_r128_rm128()6118 void i386_device::sse_mulpd_r128_rm128() // Opcode 66 0f 59
6119 {
6120 uint8_t modrm = FETCH();
6121 if( modrm >= 0xc0 ) {
6122 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6123 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * XMM(modrm & 0x7).f64[1];
6124 } else {
6125 XMM_REG src;
6126 uint32_t ea = GetEA(modrm, 0);
6127 READXMM(ea, src);
6128 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6129 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] * src.f64[1];
6130 }
6131 CYCLES(1); // TODO: correct cycle count
6132 }
6133
sse_subpd_r128_rm128()6134 void i386_device::sse_subpd_r128_rm128() // Opcode 66 0f 5c
6135 {
6136 uint8_t modrm = FETCH();
6137 if( modrm >= 0xc0 ) {
6138 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6139 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - XMM(modrm & 0x7).f64[1];
6140 } else {
6141 XMM_REG src;
6142 uint32_t ea = GetEA(modrm, 0);
6143 READXMM(ea, src);
6144 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6145 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] - src.f64[1];
6146 }
6147 CYCLES(1); // TODO: correct cycle count
6148 }
6149
sse_minpd_r128_rm128()6150 void i386_device::sse_minpd_r128_rm128() // Opcode 66 0f 5d
6151 {
6152 uint8_t modrm = FETCH();
6153 if( modrm >= 0xc0 ) {
6154 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6155 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6156 } else {
6157 XMM_REG src;
6158 uint32_t ea = GetEA(modrm, 0);
6159 READXMM(ea, src);
6160 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6161 XMM((modrm >> 3) & 0x7).f64[1] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6162 }
6163 CYCLES(1); // TODO: correct cycle count
6164 }
6165
sse_divpd_r128_rm128()6166 void i386_device::sse_divpd_r128_rm128() // Opcode 66 0f 5e
6167 {
6168 uint8_t modrm = FETCH();
6169 if( modrm >= 0xc0 ) {
6170 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6171 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / XMM(modrm & 0x7).f64[1];
6172 } else {
6173 XMM_REG src;
6174 uint32_t ea = GetEA(modrm, 0);
6175 READXMM(ea, src);
6176 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6177 XMM((modrm >> 3) & 0x7).f64[1] = XMM((modrm >> 3) & 0x7).f64[1] / src.f64[1];
6178 }
6179 CYCLES(1); // TODO: correct cycle count
6180 }
6181
sse_maxpd_r128_rm128()6182 void i386_device::sse_maxpd_r128_rm128() // Opcode 66 0f 5f
6183 {
6184 uint8_t modrm = FETCH();
6185 if( modrm >= 0xc0 ) {
6186 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6187 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], XMM(modrm & 0x7).f64[1]);
6188 } else {
6189 XMM_REG src;
6190 uint32_t ea = GetEA(modrm, 0);
6191 READXMM(ea, src);
6192 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6193 XMM((modrm >> 3) & 0x7).f64[1] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[1], src.f64[1]);
6194 }
6195 CYCLES(1); // TODO: correct cycle count
6196 }
6197
sse_movntpd_m128_r128()6198 void i386_device::sse_movntpd_m128_r128() // Opcode 66 0f 2b
6199 {
6200 uint8_t modrm = FETCH();
6201 if( modrm >= 0xc0 ) {
6202 // unsupported by cpu
6203 CYCLES(1); // TODO: correct cycle count
6204 } else {
6205 // TODO: manage the cache if present
6206 uint32_t ea = GetEA(modrm, 0);
6207 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
6208 CYCLES(1); // TODO: correct cycle count
6209 }
6210 }
6211
sse_movapd_r128_rm128()6212 void i386_device::sse_movapd_r128_rm128() // Opcode 66 0f 28
6213 {
6214 uint8_t modrm = FETCH();
6215 if( modrm >= 0xc0 ) {
6216 XMM((modrm >> 3) & 0x7) = XMM(modrm & 0x7);
6217 } else {
6218 uint32_t ea = GetEA(modrm, 0);
6219 READXMM(ea, XMM((modrm >> 3) & 0x7));
6220 }
6221 CYCLES(1); // TODO: correct cycle count
6222 }
6223
sse_movapd_rm128_r128()6224 void i386_device::sse_movapd_rm128_r128() // Opcode 66 0f 29
6225 {
6226 uint8_t modrm = FETCH();
6227 if( modrm >= 0xc0 ) {
6228 XMM(modrm & 0x7) = XMM((modrm >> 3) & 0x7);
6229 } else {
6230 uint32_t ea = GetEA(modrm, 0);
6231 WRITEXMM(ea, XMM((modrm >> 3) & 0x7));
6232 }
6233 CYCLES(1); // TODO: correct cycle count
6234 }
6235
sse_movsd_r128_r128m64()6236 void i386_device::sse_movsd_r128_r128m64() // Opcode f2 0f 10
6237 {
6238 uint8_t modrm = FETCH();
6239 if( modrm >= 0xc0 ) {
6240 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6241 } else {
6242 uint32_t ea = GetEA(modrm, 0);
6243 READXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
6244 XMM((modrm >> 3) & 0x7).q[1] = 0;
6245 }
6246 CYCLES(1); // TODO: correct cycle count
6247 }
6248
sse_movsd_r128m64_r128()6249 void i386_device::sse_movsd_r128m64_r128() // Opcode f2 0f 11
6250 {
6251 uint8_t modrm = FETCH();
6252 if( modrm >= 0xc0 ) {
6253 XMM(modrm & 0x7).q[0] = XMM((modrm >> 3) & 0x7).q[0];
6254 } else {
6255 uint32_t ea = GetEA(modrm, 0);
6256 WRITEXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
6257 }
6258 CYCLES(1); // TODO: correct cycle count
6259 }
6260
sse_movddup_r128_r128m64()6261 void i386_device::sse_movddup_r128_r128m64() // Opcode f2 0f 12
6262 {
6263 uint8_t modrm = FETCH();
6264 if( modrm >= 0xc0 ) {
6265 XMM((modrm >> 3) & 0x7).q[0] = XMM(modrm & 0x7).q[0];
6266 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6267 } else {
6268 uint32_t ea = GetEA(modrm, 0);
6269 READXMM_LO64(ea, XMM((modrm >> 3) & 0x7));
6270 XMM((modrm >> 3) & 0x7).q[1] = XMM((modrm >> 3) & 0x7).q[0];
6271 }
6272 CYCLES(1); // TODO: correct cycle count
6273 }
6274
sse_cvtsi2sd_r128_rm32()6275 void i386_device::sse_cvtsi2sd_r128_rm32() // Opcode f2 0f 2a
6276 {
6277 uint8_t modrm = FETCH();
6278 if( modrm >= 0xc0 ) {
6279 XMM((modrm >> 3) & 0x7).f64[0] = (int32_t)LOAD_RM32(modrm);
6280 } else {
6281 uint32_t ea = GetEA(modrm, 0);
6282 XMM((modrm >> 3) & 0x7).f64[0] = (int32_t)READ32(ea);
6283 }
6284 CYCLES(1); // TODO: correct cycle count
6285 }
6286
sse_cvttsd2si_r32_r128m64()6287 void i386_device::sse_cvttsd2si_r32_r128m64() // Opcode f2 0f 2c
6288 {
6289 int32_t src;
6290 uint8_t modrm = FETCH();
6291 if( modrm >= 0xc0 ) {
6292 src = (int32_t)XMM(modrm & 0x7).f64[0];
6293 } else { // otherwise is a memory address
6294 XMM_REG t;
6295 uint32_t ea = GetEA(modrm, 0);
6296 READXMM_LO64(ea, t);
6297 src = (int32_t)t.f64[0];
6298 }
6299 STORE_REG32(modrm, (uint32_t)src);
6300 CYCLES(1); // TODO: correct cycle count
6301 }
6302
sse_cvtsd2si_r32_r128m64()6303 void i386_device::sse_cvtsd2si_r32_r128m64() // Opcode f2 0f 2d
6304 {
6305 int32_t src;
6306 uint8_t modrm = FETCH();
6307 if( modrm >= 0xc0 ) {
6308 src = (int32_t)XMM(modrm & 0x7).f64[0];
6309 } else { // otherwise is a memory address
6310 XMM_REG t;
6311 uint32_t ea = GetEA(modrm, 0);
6312 READXMM_LO64(ea, t);
6313 src = (int32_t)t.f64[0];
6314 }
6315 STORE_REG32(modrm, (uint32_t)src);
6316 CYCLES(1); // TODO: correct cycle count
6317 }
6318
sse_sqrtsd_r128_r128m64()6319 void i386_device::sse_sqrtsd_r128_r128m64() // Opcode f2 0f 51
6320 {
6321 uint8_t modrm = FETCH();
6322 if( modrm >= 0xc0 ) {
6323 int s, d;
6324 s=modrm & 0x7;
6325 d=(modrm >> 3) & 0x7;
6326 XMM(d).f64[0]=sqrt(XMM(s).f64[0]);
6327 } else {
6328 XMM_REG src;
6329 int d;
6330 uint32_t ea = GetEA(modrm, 0);
6331 d=(modrm >> 3) & 0x7;
6332 READXMM(ea, src);
6333 XMM(d).f64[0]=sqrt(src.f64[0]);
6334 }
6335 CYCLES(1); // TODO: correct cycle count
6336 }
6337
sse_addsd_r128_r128m64()6338 void i386_device::sse_addsd_r128_r128m64() // Opcode f2 0f 58
6339 {
6340 uint8_t modrm = FETCH();
6341 if( modrm >= 0xc0 ) {
6342 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + XMM(modrm & 0x7).f64[0];
6343 } else {
6344 XMM_REG src;
6345 uint32_t ea = GetEA(modrm, 0);
6346 READXMM(ea, src);
6347 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] + src.f64[0];
6348 }
6349 CYCLES(1); // TODO: correct cycle count
6350 }
6351
sse_mulsd_r128_r128m64()6352 void i386_device::sse_mulsd_r128_r128m64() // Opcode f2 0f 59
6353 {
6354 uint8_t modrm = FETCH();
6355 if( modrm >= 0xc0 ) {
6356 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * XMM(modrm & 0x7).f64[0];
6357 } else {
6358 XMM_REG src;
6359 uint32_t ea = GetEA(modrm, 0);
6360 READXMM(ea, src);
6361 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] * src.f64[0];
6362 }
6363 CYCLES(1); // TODO: correct cycle count
6364 }
6365
sse_cvtsd2ss_r128_r128m64()6366 void i386_device::sse_cvtsd2ss_r128_r128m64() // Opcode f2 0f 5a
6367 {
6368 uint8_t modrm = FETCH();
6369 if( modrm >= 0xc0 ) {
6370 XMM((modrm >> 3) & 0x7).f[0] = XMM(modrm & 0x7).f64[0];
6371 } else {
6372 XMM_REG s;
6373 uint32_t ea = GetEA(modrm, 0);
6374 READXMM_LO64(ea, s);
6375 XMM((modrm >> 3) & 0x7).f[0] = s.f64[0];
6376 }
6377 CYCLES(1); // TODO: correct cycle count
6378 }
6379
sse_subsd_r128_r128m64()6380 void i386_device::sse_subsd_r128_r128m64() // Opcode f2 0f 5c
6381 {
6382 uint8_t modrm = FETCH();
6383 if( modrm >= 0xc0 ) {
6384 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - XMM(modrm & 0x7).f64[0];
6385 } else {
6386 XMM_REG src;
6387 uint32_t ea = GetEA(modrm, 0);
6388 READXMM(ea, src);
6389 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] - src.f64[0];
6390 }
6391 CYCLES(1); // TODO: correct cycle count
6392 }
6393
sse_minsd_r128_r128m64()6394 void i386_device::sse_minsd_r128_r128m64() // Opcode f2 0f 5d
6395 {
6396 uint8_t modrm = FETCH();
6397 if( modrm >= 0xc0 ) {
6398 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6399 } else {
6400 XMM_REG src;
6401 uint32_t ea = GetEA(modrm, 0);
6402 READXMM(ea, src);
6403 XMM((modrm >> 3) & 0x7).f64[0] = sse_min_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6404 }
6405 CYCLES(1); // TODO: correct cycle count
6406 }
6407
sse_divsd_r128_r128m64()6408 void i386_device::sse_divsd_r128_r128m64() // Opcode f2 0f 5e
6409 {
6410 uint8_t modrm = FETCH();
6411 if( modrm >= 0xc0 ) {
6412 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / XMM(modrm & 0x7).f64[0];
6413 } else {
6414 XMM_REG src;
6415 uint32_t ea = GetEA(modrm, 0);
6416 READXMM(ea, src);
6417 XMM((modrm >> 3) & 0x7).f64[0] = XMM((modrm >> 3) & 0x7).f64[0] / src.f64[0];
6418 }
6419 CYCLES(1); // TODO: correct cycle count
6420 }
6421
sse_maxsd_r128_r128m64()6422 void i386_device::sse_maxsd_r128_r128m64() // Opcode f2 0f 5f
6423 {
6424 uint8_t modrm = FETCH();
6425 if( modrm >= 0xc0 ) {
6426 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], XMM(modrm & 0x7).f64[0]);
6427 } else {
6428 XMM_REG src;
6429 uint32_t ea = GetEA(modrm, 0);
6430 READXMM(ea, src);
6431 XMM((modrm >> 3) & 0x7).f64[0] = sse_max_double(XMM((modrm >> 3) & 0x7).f64[0], src.f64[0]);
6432 }
6433 CYCLES(1); // TODO: correct cycle count
6434 }
6435
sse_haddps_r128_rm128()6436 void i386_device::sse_haddps_r128_rm128() // Opcode f2 0f 7c
6437 {
6438 uint8_t modrm = FETCH();
6439 if( modrm >= 0xc0 ) {
6440 int s, d;
6441 float f1, f2, f3, f4;
6442 s=modrm & 0x7;
6443 d=(modrm >> 3) & 0x7;
6444 f1=XMM(d).f[0]+XMM(d).f[1];
6445 f2=XMM(d).f[2]+XMM(d).f[3];
6446 f3=XMM(s).f[0]+XMM(s).f[1];
6447 f4=XMM(s).f[2]+XMM(s).f[3];
6448 XMM(d).f[0]=f1;
6449 XMM(d).f[1]=f2;
6450 XMM(d).f[2]=f3;
6451 XMM(d).f[3]=f4;
6452 } else {
6453 XMM_REG src;
6454 int d;
6455 float f1, f2;
6456 uint32_t ea = GetEA(modrm, 0);
6457 d=(modrm >> 3) & 0x7;
6458 READXMM(ea, src);
6459 f1=XMM(d).f[0]+XMM(d).f[1];
6460 f2=XMM(d).f[2]+XMM(d).f[3];
6461 XMM(d).f[0]=f1;
6462 XMM(d).f[1]=f2;
6463 XMM(d).f[2]=src.f[0]+src.f[1];
6464 XMM(d).f[3]=src.f[2]+src.f[3];
6465 }
6466 CYCLES(1); // TODO: correct cycle count
6467 }
6468
sse_hsubps_r128_rm128()6469 void i386_device::sse_hsubps_r128_rm128() // Opcode f2 0f 7d
6470 {
6471 uint8_t modrm = FETCH();
6472 if( modrm >= 0xc0 ) {
6473 int s, d;
6474 float f1, f2, f3, f4;
6475 s=modrm & 0x7;
6476 d=(modrm >> 3) & 0x7;
6477 f1=XMM(d).f[0]-XMM(d).f[1];
6478 f2=XMM(d).f[2]-XMM(d).f[3];
6479 f3=XMM(s).f[0]-XMM(s).f[1];
6480 f4=XMM(s).f[2]-XMM(s).f[3];
6481 XMM(d).f[0]=f1;
6482 XMM(d).f[1]=f2;
6483 XMM(d).f[2]=f3;
6484 XMM(d).f[3]=f4;
6485 } else {
6486 XMM_REG src;
6487 int d;
6488 float f1, f2;
6489 uint32_t ea = GetEA(modrm, 0);
6490 d=(modrm >> 3) & 0x7;
6491 READXMM(ea, src);
6492 f1=XMM(d).f[0]-XMM(d).f[1];
6493 f2=XMM(d).f[2]-XMM(d).f[3];
6494 XMM(d).f[0]=f1;
6495 XMM(d).f[1]=f2;
6496 XMM(d).f[2]=src.f[0]-src.f[1];
6497 XMM(d).f[3]=src.f[2]-src.f[3];
6498 }
6499 CYCLES(1); // TODO: correct cycle count
6500 }
6501
sse_cmpsd_r128_r128m64_i8()6502 void i386_device::sse_cmpsd_r128_r128m64_i8() // Opcode f2 0f c2
6503 {
6504 uint8_t modrm = FETCH();
6505 if( modrm >= 0xc0 ) {
6506 int s,d;
6507 uint8_t imm8 = FETCH();
6508 s=modrm & 0x7;
6509 d=(modrm >> 3) & 0x7;
6510 sse_predicate_compare_double_scalar(imm8, XMM(d), XMM(s));
6511 } else {
6512 int d;
6513 XMM_REG s;
6514 uint32_t ea = GetEA(modrm, 0);
6515 uint8_t imm8 = FETCH();
6516 READXMM_LO64(ea, s);
6517 d=(modrm >> 3) & 0x7;
6518 sse_predicate_compare_double_scalar(imm8, XMM(d), s);
6519 }
6520 CYCLES(1); // TODO: correct cycle count
6521 }
6522
sse_addsubps_r128_rm128()6523 void i386_device::sse_addsubps_r128_rm128() // Opcode f2 0f d0
6524 {
6525 uint8_t modrm = FETCH();
6526 if( modrm >= 0xc0 ) {
6527 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - XMM(modrm & 0x7).f[0];
6528 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + XMM(modrm & 0x7).f[1];
6529 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - XMM(modrm & 0x7).f[2];
6530 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + XMM(modrm & 0x7).f[3];
6531 } else {
6532 XMM_REG src;
6533 uint32_t ea = GetEA(modrm, 0);
6534 READXMM(ea, src);
6535 XMM((modrm >> 3) & 0x7).f[0]=XMM((modrm >> 3) & 0x7).f[0] - src.f[0];
6536 XMM((modrm >> 3) & 0x7).f[1]=XMM((modrm >> 3) & 0x7).f[1] + src.f[1];
6537 XMM((modrm >> 3) & 0x7).f[2]=XMM((modrm >> 3) & 0x7).f[2] - src.f[2];
6538 XMM((modrm >> 3) & 0x7).f[3]=XMM((modrm >> 3) & 0x7).f[3] + src.f[3];
6539 }
6540 CYCLES(1); // TODO: correct cycle count
6541 }
6542
sse_movdq2q_r64_r128()6543 void i386_device::sse_movdq2q_r64_r128() // Opcode f2 0f d6
6544 {
6545 uint8_t modrm = FETCH();
6546 MMXPROLOG();
6547 if( modrm >= 0xc0 ) {
6548 MMX((modrm >> 3) & 0x7).q = XMM(modrm & 0x7).q[0];
6549 CYCLES(1); // TODO: correct cycle count
6550 } else {
6551 // unsupported by cpu
6552 CYCLES(1); // TODO: correct cycle count
6553 }
6554 }
6555
sse_cvtpd2dq_r128_rm128()6556 void i386_device::sse_cvtpd2dq_r128_rm128() // Opcode f2 0f e6
6557 {
6558 uint8_t modrm = FETCH();
6559 if( modrm >= 0xc0 ) {
6560 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)XMM((modrm >> 3) & 0x7).f64[0];
6561 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)XMM((modrm >> 3) & 0x7).f64[1];
6562 XMM((modrm >> 3) & 0x7).q[1] = 0;
6563 } else {
6564 XMM_REG src;
6565 uint32_t ea = GetEA(modrm, 0);
6566 READXMM(ea, src);
6567 XMM((modrm >> 3) & 0x7).i[0]=(int32_t)src.f64[0];
6568 XMM((modrm >> 3) & 0x7).i[1]=(int32_t)src.f64[1];
6569 XMM((modrm >> 3) & 0x7).q[1] = 0;
6570 }
6571 CYCLES(1); // TODO: correct cycle count
6572 }
6573
sse_lddqu_r128_m128()6574 void i386_device::sse_lddqu_r128_m128() // Opcode f2 0f f0
6575 {
6576 uint8_t modrm = FETCH();
6577 if( modrm >= 0xc0 ) {
6578 // unsupported by cpu
6579 CYCLES(1); // TODO: correct cycle count
6580 } else {
6581 uint32_t ea = GetEA(modrm, 0);
6582 READXMM(ea, XMM((modrm >> 3) & 0x7));
6583 }
6584 }
6585