1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2013 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <signal.h>
26 
27 #include "opcode/rx.h"
28 #include "cpu.h"
29 #include "mem.h"
30 #include "syscalls.h"
31 #include "fpu.h"
32 #include "err.h"
33 #include "misc.h"
34 
35 #ifdef CYCLE_STATS
36 static const char * id_names[] = {
37   "RXO_unknown",
38   "RXO_mov",	/* d = s (signed) */
39   "RXO_movbi",	/* d = [s,s2] (signed) */
40   "RXO_movbir",	/* [s,s2] = d (signed) */
41   "RXO_pushm",	/* s..s2 */
42   "RXO_popm",	/* s..s2 */
43   "RXO_xchg",	/* s <-> d */
44   "RXO_stcc",	/* d = s if cond(s2) */
45   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
46 
47   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
48      that d may be "None".  */
49   "RXO_and",
50   "RXO_or",
51   "RXO_xor",
52   "RXO_add",
53   "RXO_sub",
54   "RXO_mul",
55   "RXO_div",
56   "RXO_divu",
57   "RXO_shll",
58   "RXO_shar",
59   "RXO_shlr",
60 
61   "RXO_adc",	/* d = d + s + carry */
62   "RXO_sbb",	/* d = d - s - ~carry */
63   "RXO_abs",	/* d = |s| */
64   "RXO_max",	/* d = max(d,s) */
65   "RXO_min",	/* d = min(d,s) */
66   "RXO_emul",	/* d:64 = d:32 * s */
67   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
68 
69   "RXO_rolc",	/* d <<= 1 through carry */
70   "RXO_rorc",	/* d >>= 1 through carry*/
71   "RXO_rotl",	/* d <<= #s without carry */
72   "RXO_rotr",	/* d >>= #s without carry*/
73   "RXO_revw",	/* d = revw(s) */
74   "RXO_revl",	/* d = revl(s) */
75   "RXO_branch",	/* pc = d if cond(s) */
76   "RXO_branchrel",/* pc += d if cond(s) */
77   "RXO_jsr",	/* pc = d */
78   "RXO_jsrrel",	/* pc += d */
79   "RXO_rts",
80   "RXO_nop",
81   "RXO_nop2",
82   "RXO_nop3",
83 
84   "RXO_scmpu",
85   "RXO_smovu",
86   "RXO_smovb",
87   "RXO_suntil",
88   "RXO_swhile",
89   "RXO_smovf",
90   "RXO_sstr",
91 
92   "RXO_rmpa",
93   "RXO_mulhi",
94   "RXO_mullo",
95   "RXO_machi",
96   "RXO_maclo",
97   "RXO_mvtachi",
98   "RXO_mvtaclo",
99   "RXO_mvfachi",
100   "RXO_mvfacmi",
101   "RXO_mvfaclo",
102   "RXO_racw",
103 
104   "RXO_sat",	/* sat(d) */
105   "RXO_satr",
106 
107   "RXO_fadd",	/* d op= s */
108   "RXO_fcmp",
109   "RXO_fsub",
110   "RXO_ftoi",
111   "RXO_fmul",
112   "RXO_fdiv",
113   "RXO_round",
114   "RXO_itof",
115 
116   "RXO_bset",	/* d |= (1<<s) */
117   "RXO_bclr",	/* d &= ~(1<<s) */
118   "RXO_btst",	/* s & (1<<s2) */
119   "RXO_bnot",	/* d ^= (1<<s) */
120   "RXO_bmcc",	/* d<s> = cond(s2) */
121 
122   "RXO_clrpsw",	/* flag index in d */
123   "RXO_setpsw",	/* flag index in d */
124   "RXO_mvtipl",	/* new IPL in s */
125 
126   "RXO_rtfi",
127   "RXO_rte",
128   "RXO_rtd",	/* undocumented */
129   "RXO_brk",
130   "RXO_dbt",	/* undocumented */
131   "RXO_int",	/* vector id in s */
132   "RXO_stop",
133   "RXO_wait",
134 
135   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
136 };
137 
138 static const char * optype_names[] = {
139   " -  ",
140   "#Imm",	/* #addend */
141   " Rn ",	/* Rn */
142   "[Rn]",	/* [Rn + addend] */
143   "Ps++",	/* [Rn+] */
144   "--Pr",	/* [-Rn] */
145   " cc ",	/* eq, gtu, etc */
146   "Flag",	/* [UIOSZC] */
147   "RbRi"	/* [Rb + scale * Ri] */
148 };
149 
150 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
151 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
152 #define N_MAP 30
153 
154 static unsigned long long benchmark_start_cycle;
155 static unsigned long long benchmark_end_cycle;
156 
157 static int op_cache[N_RXT][N_RXT][N_RXT];
158 static int op_cache_rev[N_MAP];
159 static int op_cache_idx = 0;
160 
161 static int
op_lookup(int a,int b,int c)162 op_lookup (int a, int b, int c)
163 {
164   if (op_cache[a][b][c])
165     return op_cache[a][b][c];
166   op_cache_idx ++;
167   if (op_cache_idx >= N_MAP)
168     {
169       printf("op_cache_idx exceeds %d\n", N_MAP);
170       exit(1);
171     }
172   op_cache[a][b][c] = op_cache_idx;
173   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
174   return op_cache_idx;
175 }
176 
177 static char *
op_cache_string(int map)178 op_cache_string (int map)
179 {
180   static int ci;
181   static char cb[5][20];
182   int a, b, c;
183 
184   map = op_cache_rev[map];
185   a = (map >> 8) & 15;
186   b = (map >> 4) & 15;
187   c = (map >> 0) & 15;
188   ci = (ci + 1) % 5;
189   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
190   return cb[ci];
191 }
192 
193 static unsigned long long cycles_per_id[N_RXO][N_MAP];
194 static unsigned long long times_per_id[N_RXO][N_MAP];
195 static unsigned long long memory_stalls;
196 static unsigned long long register_stalls;
197 static unsigned long long branch_stalls;
198 static unsigned long long branch_alignment_stalls;
199 static unsigned long long fast_returns;
200 
201 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
202 static int prev_opcode_id = RXO_unknown;
203 static int po0;
204 
205 #define STATS(x) x
206 
207 #else
208 #define STATS(x)
209 #endif /* CYCLE_STATS */
210 
211 
212 #ifdef CYCLE_ACCURATE
213 
214 static int new_rt = -1;
215 
216 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
217 static int branch_alignment_penalty = 0;
218 
219 #endif
220 
221 static int running_benchmark = 1;
222 
223 #define tprintf if (trace && running_benchmark) printf
224 
225 jmp_buf decode_jmp_buf;
226 unsigned int rx_cycles = 0;
227 
228 #ifdef CYCLE_ACCURATE
229 /* If nonzero, memory was read at some point and cycle latency might
230    take effect.  */
231 static int memory_source = 0;
232 /* If nonzero, memory was written and extra cycles might be
233    needed.  */
234 static int memory_dest = 0;
235 
236 static void
cycles(int throughput)237 cycles (int throughput)
238 {
239   tprintf("%d cycles\n", throughput);
240   regs.cycle_count += throughput;
241 }
242 
243 /* Number of execution (E) cycles the op uses.  For memory sources, we
244    include the load micro-op stall as two extra E cycles.  */
245 #define E(c) cycles (memory_source ? c + 2 : c)
246 #define E1 cycles (1)
247 #define E2 cycles (2)
248 #define EBIT cycles (memory_source ? 2 : 1)
249 
250 /* Check to see if a read latency must be applied for a given register.  */
251 #define RL(r) \
252   if (regs.rt == r )							\
253     {									\
254       tprintf("register %d load stall\n", r);				\
255       regs.cycle_count ++;						\
256       STATS(register_stalls ++);					\
257       regs.rt = -1;							\
258     }
259 
260 #define RLD(r)					\
261   if (memory_source)				\
262     {						\
263       tprintf ("Rt now %d\n", r);		\
264       new_rt = r;				\
265     }
266 
267 static int
lsb_count(unsigned long v,int is_signed)268 lsb_count (unsigned long v, int is_signed)
269 {
270   int i, lsb;
271   if (is_signed && (v & 0x80000000U))
272     v = (unsigned long)(long)(-v);
273   for (i=31; i>=0; i--)
274     if (v & (1 << i))
275       {
276 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
277 	lsb = (i + 2) / 2;
278 	return lsb;
279       }
280   return 0;
281 }
282 
283 static int
divu_cycles(unsigned long num,unsigned long den)284 divu_cycles(unsigned long num, unsigned long den)
285 {
286   int nb = lsb_count (num, 0);
287   int db = lsb_count (den, 0);
288   int rv;
289 
290   if (nb < db)
291     rv = 2;
292   else
293     rv = 3 + nb - db;
294   E (rv);
295   return rv;
296 }
297 
298 static int
div_cycles(long num,long den)299 div_cycles(long num, long den)
300 {
301   int nb = lsb_count ((unsigned long)num, 1);
302   int db = lsb_count ((unsigned long)den, 1);
303   int rv;
304 
305   if (nb < db)
306     rv = 3;
307   else
308     rv = 5 + nb - db;
309   E (rv);
310   return rv;
311 }
312 
313 #else /* !CYCLE_ACCURATE */
314 
315 #define cycles(t)
316 #define E(c)
317 #define E1
318 #define E2
319 #define EBIT
320 #define RL(r)
321 #define RLD(r)
322 
323 #define divu_cycles(n,d)
324 #define div_cycles(n,d)
325 
326 #endif /* else CYCLE_ACCURATE */
327 
328 static int size2bytes[] = {
329   4, 1, 1, 1, 2, 2, 2, 3, 4
330 };
331 
332 typedef struct {
333   unsigned long dpc;
334 } RX_Data;
335 
336 #define rx_abort() _rx_abort(__FILE__, __LINE__)
337 static void
_rx_abort(const char * file,int line)338 _rx_abort (const char *file, int line)
339 {
340   if (strrchr (file, '/'))
341     file = strrchr (file, '/') + 1;
342   fprintf(stderr, "abort at %s:%d\n", file, line);
343   abort();
344 }
345 
346 static unsigned char *get_byte_base;
347 static RX_Opcode_Decoded **decode_cache_base;
348 static SI get_byte_page;
349 
350 void
reset_decoder(void)351 reset_decoder (void)
352 {
353   get_byte_base = 0;
354   decode_cache_base = 0;
355   get_byte_page = 0;
356 }
357 
358 static inline void
maybe_get_mem_page(SI tpc)359 maybe_get_mem_page (SI tpc)
360 {
361   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
362     {
363       get_byte_page = tpc & NONPAGE_MASK;
364       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
365       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
366     }
367 }
368 
369 /* This gets called a *lot* so optimize it.  */
370 static int
rx_get_byte(void * vdata)371 rx_get_byte (void *vdata)
372 {
373   RX_Data *rx_data = (RX_Data *)vdata;
374   SI tpc = rx_data->dpc;
375 
376   /* See load.c for an explanation of this.  */
377   if (rx_big_endian)
378     tpc ^= 3;
379 
380   maybe_get_mem_page (tpc);
381 
382   rx_data->dpc ++;
383   return get_byte_base [tpc];
384 }
385 
386 static int
get_op(const RX_Opcode_Decoded * rd,int i)387 get_op (const RX_Opcode_Decoded *rd, int i)
388 {
389   const RX_Opcode_Operand *o = rd->op + i;
390   int addr, rv = 0;
391 
392   switch (o->type)
393     {
394     case RX_Operand_None:
395       rx_abort ();
396 
397     case RX_Operand_Immediate:	/* #addend */
398       return o->addend;
399 
400     case RX_Operand_Register:	/* Rn */
401       RL (o->reg);
402       rv = get_reg (o->reg);
403       break;
404 
405     case RX_Operand_Predec:	/* [-Rn] */
406       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
407       /* fall through */
408     case RX_Operand_Postinc:	/* [Rn+] */
409     case RX_Operand_Indirect:	/* [Rn + addend] */
410     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
411 #ifdef CYCLE_ACCURATE
412       RL (o->reg);
413       if (o->type == RX_Operand_TwoReg)
414 	RL (rd->op[2].reg);
415       regs.rt = -1;
416       if (regs.m2m == M2M_BOTH)
417 	{
418 	  tprintf("src memory stall\n");
419 #ifdef CYCLE_STATS
420 	  memory_stalls ++;
421 #endif
422 	  regs.cycle_count ++;
423 	  regs.m2m = 0;
424 	}
425 
426       memory_source = 1;
427 #endif
428 
429       if (o->type == RX_Operand_TwoReg)
430 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
431       else
432 	addr = get_reg (o->reg) + o->addend;
433 
434       switch (o->size)
435 	{
436 	case RX_AnySize:
437 	  rx_abort ();
438 
439 	case RX_Byte: /* undefined extension */
440 	case RX_UByte:
441 	case RX_SByte:
442 	  rv = mem_get_qi (addr);
443 	  break;
444 
445 	case RX_Word: /* undefined extension */
446 	case RX_UWord:
447 	case RX_SWord:
448 	  rv = mem_get_hi (addr);
449 	  break;
450 
451 	case RX_3Byte:
452 	  rv = mem_get_psi (addr);
453 	  break;
454 
455 	case RX_Long:
456 	  rv = mem_get_si (addr);
457 	  break;
458 	}
459 
460       if (o->type == RX_Operand_Postinc)
461 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
462 
463       break;
464 
465     case RX_Operand_Condition:	/* eq, gtu, etc */
466       return condition_true (o->reg);
467 
468     case RX_Operand_Flag:	/* [UIOSZC] */
469       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
470     }
471 
472   /* if we've gotten here, we need to clip/extend the value according
473      to the size.  */
474   switch (o->size)
475     {
476     case RX_AnySize:
477       rx_abort ();
478 
479     case RX_Byte: /* undefined extension */
480       rv |= 0xdeadbe00; /* keep them honest */
481       break;
482 
483     case RX_UByte:
484       rv &= 0xff;
485       break;
486 
487     case RX_SByte:
488       rv = sign_ext (rv, 8);
489       break;
490 
491     case RX_Word: /* undefined extension */
492       rv |= 0xdead0000; /* keep them honest */
493       break;
494 
495     case RX_UWord:
496       rv &=  0xffff;
497       break;
498 
499     case RX_SWord:
500       rv = sign_ext (rv, 16);
501       break;
502 
503     case RX_3Byte:
504       rv &= 0xffffff;
505       break;
506 
507     case RX_Long:
508       break;
509     }
510   return rv;
511 }
512 
513 static void
put_op(const RX_Opcode_Decoded * rd,int i,int v)514 put_op (const RX_Opcode_Decoded *rd, int i, int v)
515 {
516   const RX_Opcode_Operand *o = rd->op + i;
517   int addr;
518 
519   switch (o->size)
520     {
521     case RX_AnySize:
522       if (o->type != RX_Operand_Register)
523 	rx_abort ();
524       break;
525 
526     case RX_Byte: /* undefined extension */
527       v |= 0xdeadbe00; /* keep them honest */
528       break;
529 
530     case RX_UByte:
531       v &= 0xff;
532       break;
533 
534     case RX_SByte:
535       v = sign_ext (v, 8);
536       break;
537 
538     case RX_Word: /* undefined extension */
539       v |= 0xdead0000; /* keep them honest */
540       break;
541 
542     case RX_UWord:
543       v &=  0xffff;
544       break;
545 
546     case RX_SWord:
547       v = sign_ext (v, 16);
548       break;
549 
550     case RX_3Byte:
551       v &= 0xffffff;
552       break;
553 
554     case RX_Long:
555       break;
556     }
557 
558   switch (o->type)
559     {
560     case RX_Operand_None:
561       /* Opcodes like TST and CMP use this.  */
562       break;
563 
564     case RX_Operand_Immediate:	/* #addend */
565     case RX_Operand_Condition:	/* eq, gtu, etc */
566       rx_abort ();
567 
568     case RX_Operand_Register:	/* Rn */
569       put_reg (o->reg, v);
570       RLD (o->reg);
571       break;
572 
573     case RX_Operand_Predec:	/* [-Rn] */
574       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
575       /* fall through */
576     case RX_Operand_Postinc:	/* [Rn+] */
577     case RX_Operand_Indirect:	/* [Rn + addend] */
578     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
579 
580 #ifdef CYCLE_ACCURATE
581       if (regs.m2m == M2M_BOTH)
582 	{
583 	  tprintf("dst memory stall\n");
584 	  regs.cycle_count ++;
585 #ifdef CYCLE_STATS
586 	  memory_stalls ++;
587 #endif
588 	  regs.m2m = 0;
589 	}
590       memory_dest = 1;
591 #endif
592 
593       if (o->type == RX_Operand_TwoReg)
594 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
595       else
596 	addr = get_reg (o->reg) + o->addend;
597 
598       switch (o->size)
599 	{
600 	case RX_AnySize:
601 	  rx_abort ();
602 
603 	case RX_Byte: /* undefined extension */
604 	case RX_UByte:
605 	case RX_SByte:
606 	  mem_put_qi (addr, v);
607 	  break;
608 
609 	case RX_Word: /* undefined extension */
610 	case RX_UWord:
611 	case RX_SWord:
612 	  mem_put_hi (addr, v);
613 	  break;
614 
615 	case RX_3Byte:
616 	  mem_put_psi (addr, v);
617 	  break;
618 
619 	case RX_Long:
620 	  mem_put_si (addr, v);
621 	  break;
622 	}
623 
624       if (o->type == RX_Operand_Postinc)
625 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
626 
627       break;
628 
629     case RX_Operand_Flag:	/* [UIOSZC] */
630       if (v)
631 	regs.r_psw |= (1 << o->reg);
632       else
633 	regs.r_psw &= ~(1 << o->reg);
634       break;
635     }
636 }
637 
638 #define PD(x) put_op (opcode, 0, x)
639 #define PS(x) put_op (opcode, 1, x)
640 #define PS2(x) put_op (opcode, 2, x)
641 #define GD() get_op (opcode, 0)
642 #define GS() get_op (opcode, 1)
643 #define GS2() get_op (opcode, 2)
644 #define DSZ() size2bytes[opcode->op[0].size]
645 #define SSZ() size2bytes[opcode->op[0].size]
646 #define S2SZ() size2bytes[opcode->op[0].size]
647 
648 /* "Universal" sources.  */
649 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
650 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
651 
652 static void
push(int val)653 push(int val)
654 {
655   int rsp = get_reg (sp);
656   rsp -= 4;
657   put_reg (sp, rsp);
658   mem_put_si (rsp, val);
659 }
660 
661 /* Just like the above, but tag the memory as "pushed pc" so if anyone
662    tries to write to it, it will cause an error.  */
663 static void
pushpc(int val)664 pushpc(int val)
665 {
666   int rsp = get_reg (sp);
667   rsp -= 4;
668   put_reg (sp, rsp);
669   mem_put_si (rsp, val);
670   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
671 }
672 
673 static int
pop()674 pop()
675 {
676   int rv;
677   int rsp = get_reg (sp);
678   rv = mem_get_si (rsp);
679   rsp += 4;
680   put_reg (sp, rsp);
681   return rv;
682 }
683 
684 static int
poppc()685 poppc()
686 {
687   int rv;
688   int rsp = get_reg (sp);
689   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
690     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
691   rv = mem_get_si (rsp);
692   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
693   rsp += 4;
694   put_reg (sp, rsp);
695   return rv;
696 }
697 
698 #define MATH_OP(vop,c)				\
699 { \
700   umb = US2(); \
701   uma = US1(); \
702   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
703   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
704   ma = sign_ext (uma, DSZ() * 8);					\
705   mb = sign_ext (umb, DSZ() * 8);					\
706   sll = (long long) ma vop (long long) mb vop c; \
707   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
708   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
709   PD (sll); \
710   E (1);    \
711 }
712 
713 #define LOGIC_OP(vop) \
714 { \
715   mb = US2(); \
716   ma = US1(); \
717   v = ma vop mb; \
718   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
719   set_sz (v, DSZ()); \
720   PD(v); \
721   E (1); \
722 }
723 
724 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
725 { \
726   int i, c=0; \
727   count = US2(); \
728   val = (type)US1();				\
729   tprintf("%lld " #OP " %d\n", val, count); \
730   for (i = 0; i < count; i ++) \
731     { \
732       c = val & carry_mask; \
733       val OP 1; \
734     } \
735   if (count) \
736     set_oszc (val, 4, c); \
737   PD (val); \
738 }
739 
740 typedef union {
741   int i;
742   float f;
743 } FloatInt;
744 
745 static inline int
float2int(float f)746 float2int (float f)
747 {
748   FloatInt fi;
749   fi.f = f;
750   return fi.i;
751 }
752 
753 static inline float
int2float(int i)754 int2float (int i)
755 {
756   FloatInt fi;
757   fi.i = i;
758   return fi.f;
759 }
760 
761 static int
fop_fadd(fp_t s1,fp_t s2,fp_t * d)762 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
763 {
764   *d = rxfp_add (s1, s2);
765   return 1;
766 }
767 
768 static int
fop_fmul(fp_t s1,fp_t s2,fp_t * d)769 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
770 {
771   *d = rxfp_mul (s1, s2);
772   return 1;
773 }
774 
775 static int
fop_fdiv(fp_t s1,fp_t s2,fp_t * d)776 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
777 {
778   *d = rxfp_div (s1, s2);
779   return 1;
780 }
781 
782 static int
fop_fsub(fp_t s1,fp_t s2,fp_t * d)783 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
784 {
785   *d = rxfp_sub (s1, s2);
786   return 1;
787 }
788 
789 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
790 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
791 #define FPCHECK() \
792   if (FPPENDING()) \
793     return do_fp_exception (opcode_pc)
794 
795 #define FLOAT_OP(func) \
796 { \
797   int do_store;   \
798   fp_t fa, fb, fc; \
799   FPCLEAR(); \
800   fb = GS (); \
801   fa = GD (); \
802   do_store = fop_##func (fa, fb, &fc); \
803   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
804   FPCHECK(); \
805   if (do_store) \
806     PD (fc);	\
807   mb = 0; \
808   if ((fc & 0x80000000UL) != 0) \
809     mb |= FLAGBIT_S; \
810   if ((fc & 0x7fffffffUL) == 0)			\
811     mb |= FLAGBIT_Z; \
812   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
813 }
814 
815 #define carry (FLAG_C ? 1 : 0)
816 
817 static struct {
818   unsigned long vaddr;
819   const char *str;
820   int signal;
821 } exception_info[] = {
822   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
823   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
824   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
825   { 0xFFFFFFE4UL, "floating point", SIGFPE }
826 };
827 #define EX_PRIVILEDGED	0
828 #define EX_ACCESS	1
829 #define EX_UNDEFINED	2
830 #define EX_FLOATING	3
831 #define EXCEPTION(n)  \
832   return generate_exception (n, opcode_pc)
833 
834 #define PRIVILEDGED() \
835   if (FLAG_PM) \
836     EXCEPTION (EX_PRIVILEDGED)
837 
838 static int
generate_exception(unsigned long type,SI opcode_pc)839 generate_exception (unsigned long type, SI opcode_pc)
840 {
841   SI old_psw, old_pc, new_pc;
842 
843   new_pc = mem_get_si (exception_info[type].vaddr);
844   /* 0x00020000 is the value used to initialise the known
845      exception vectors (see rx.ld), but it is a reserved
846      area of memory so do not try to access it, and if the
847      value has not been changed by the program then the
848      vector has not been installed.  */
849   if (new_pc == 0 || new_pc == 0x00020000)
850     {
851       if (rx_in_gdb)
852 	return RX_MAKE_STOPPED (exception_info[type].signal);
853 
854       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
855 	      exception_info[type].str, (unsigned long) opcode_pc);
856       if (type == EX_FLOATING)
857 	{
858 	  int mask = FPPENDING ();
859 	  fprintf (stderr, "Pending FP exceptions:");
860 	  if (mask & FPSWBITS_FV)
861 	    fprintf(stderr, " Invalid");
862 	  if (mask & FPSWBITS_FO)
863 	    fprintf(stderr, " Overflow");
864 	  if (mask & FPSWBITS_FZ)
865 	    fprintf(stderr, " Division-by-zero");
866 	  if (mask & FPSWBITS_FU)
867 	    fprintf(stderr, " Underflow");
868 	  if (mask & FPSWBITS_FX)
869 	    fprintf(stderr, " Inexact");
870 	  if (mask & FPSWBITS_CE)
871 	    fprintf(stderr, " Unimplemented");
872 	  fprintf(stderr, "\n");
873 	}
874       return RX_MAKE_EXITED (1);
875     }
876 
877   tprintf ("Triggering %s exception\n", exception_info[type].str);
878 
879   old_psw = regs.r_psw;
880   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
881   old_pc = opcode_pc;
882   regs.r_pc = new_pc;
883   pushpc (old_psw);
884   pushpc (old_pc);
885   return RX_MAKE_STEPPED ();
886 }
887 
888 void
generate_access_exception(void)889 generate_access_exception (void)
890 {
891   int rv;
892 
893   rv = generate_exception (EX_ACCESS, regs.r_pc);
894   if (RX_EXITED (rv))
895     longjmp (decode_jmp_buf, rv);
896 }
897 
898 static int
do_fp_exception(unsigned long opcode_pc)899 do_fp_exception (unsigned long opcode_pc)
900 {
901   while (FPPENDING())
902     EXCEPTION (EX_FLOATING);
903   return RX_MAKE_STEPPED ();
904 }
905 
906 static int
op_is_memory(const RX_Opcode_Decoded * rd,int i)907 op_is_memory (const RX_Opcode_Decoded *rd, int i)
908 {
909   switch (rd->op[i].type)
910     {
911     case RX_Operand_Predec:
912     case RX_Operand_Postinc:
913     case RX_Operand_Indirect:
914       return 1;
915     default:
916       return 0;
917     }
918 }
919 #define OM(i) op_is_memory (opcode, i)
920 
921 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
922 
923 int
decode_opcode()924 decode_opcode ()
925 {
926   unsigned int uma=0, umb=0;
927   int ma=0, mb=0;
928   int opcode_size, v;
929   unsigned long long ll;
930   long long sll;
931   unsigned long opcode_pc;
932   RX_Data rx_data;
933   const RX_Opcode_Decoded *opcode;
934 #ifdef CYCLE_STATS
935   unsigned long long prev_cycle_count;
936 #endif
937 #ifdef CYCLE_ACCURATE
938   unsigned int tx;
939 #endif
940 
941 #ifdef CYCLE_STATS
942   prev_cycle_count = regs.cycle_count;
943 #endif
944 
945 #ifdef CYCLE_ACCURATE
946   memory_source = 0;
947   memory_dest = 0;
948 #endif
949 
950   rx_cycles ++;
951 
952   maybe_get_mem_page (regs.r_pc);
953 
954   opcode_pc = regs.r_pc;
955 
956   /* Note that we don't word-swap this point, there's no point.  */
957   if (decode_cache_base[opcode_pc] == NULL)
958     {
959       RX_Opcode_Decoded *opcode_w;
960       rx_data.dpc = opcode_pc;
961       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
962       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
963 				      rx_get_byte, &rx_data);
964       opcode = opcode_w;
965     }
966   else
967     {
968       opcode = decode_cache_base[opcode_pc];
969       opcode_size = opcode->n_bytes;
970     }
971 
972 #ifdef CYCLE_ACCURATE
973   if (branch_alignment_penalty)
974     {
975       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
976 	{
977 	  tprintf("1 cycle branch alignment penalty\n");
978 	  cycles (branch_alignment_penalty);
979 #ifdef CYCLE_STATS
980 	  branch_alignment_stalls ++;
981 #endif
982 	}
983       branch_alignment_penalty = 0;
984     }
985 #endif
986 
987   regs.r_pc += opcode_size;
988 
989   rx_flagmask = opcode->flags_s;
990   rx_flagand = ~(int)opcode->flags_0;
991   rx_flagor = opcode->flags_1;
992 
993   switch (opcode->id)
994     {
995     case RXO_abs:
996       sll = GS ();
997       tprintf("|%lld| = ", sll);
998       if (sll < 0)
999 	sll = -sll;
1000       tprintf("%lld\n", sll);
1001       PD (sll);
1002       set_osz (sll, 4);
1003       E (1);
1004       break;
1005 
1006     case RXO_adc:
1007       MATH_OP (+,carry);
1008       break;
1009 
1010     case RXO_add:
1011       MATH_OP (+,0);
1012       break;
1013 
1014     case RXO_and:
1015       LOGIC_OP (&);
1016       break;
1017 
1018     case RXO_bclr:
1019       ma = GD ();
1020       mb = GS ();
1021       if (opcode->op[0].type == RX_Operand_Register)
1022 	mb &= 0x1f;
1023       else
1024 	mb &= 0x07;
1025       ma &= ~(1 << mb);
1026       PD (ma);
1027       EBIT;
1028       break;
1029 
1030     case RXO_bmcc:
1031       ma = GD ();
1032       mb = GS ();
1033       if (opcode->op[0].type == RX_Operand_Register)
1034 	mb &= 0x1f;
1035       else
1036 	mb &= 0x07;
1037       if (GS2 ())
1038 	ma |= (1 << mb);
1039       else
1040 	ma &= ~(1 << mb);
1041       PD (ma);
1042       EBIT;
1043       break;
1044 
1045     case RXO_bnot:
1046       ma = GD ();
1047       mb = GS ();
1048       if (opcode->op[0].type == RX_Operand_Register)
1049 	mb &= 0x1f;
1050       else
1051 	mb &= 0x07;
1052       ma ^= (1 << mb);
1053       PD (ma);
1054       EBIT;
1055       break;
1056 
1057     case RXO_branch:
1058       if (opcode->op[1].type == RX_Operand_None || GS())
1059 	{
1060 #ifdef CYCLE_ACCURATE
1061 	  SI old_pc = regs.r_pc;
1062 	  int delta;
1063 #endif
1064 	  regs.r_pc = GD();
1065 #ifdef CYCLE_ACCURATE
1066 	  delta = regs.r_pc - old_pc;
1067 	  if (delta >= 0 && delta < 16
1068 	      && opcode_size > 1)
1069 	    {
1070 	      tprintf("near forward branch bonus\n");
1071 	      cycles (2);
1072 	    }
1073 	  else
1074 	    {
1075 	      cycles (3);
1076 	      branch_alignment_penalty = 1;
1077 	    }
1078 #ifdef CYCLE_STATS
1079 	  branch_stalls ++;
1080 #endif
1081 #endif
1082 	}
1083 #ifdef CYCLE_ACCURATE
1084       else
1085 	cycles (1);
1086 #endif
1087       break;
1088 
1089     case RXO_branchrel:
1090       if (opcode->op[1].type == RX_Operand_None || GS())
1091 	{
1092 	  int delta = GD();
1093 	  regs.r_pc = opcode_pc + delta;
1094 #ifdef CYCLE_ACCURATE
1095 	  /* Note: specs say 3, chip says 2.  */
1096 	  if (delta >= 0 && delta < 16
1097 	      && opcode_size > 1)
1098 	    {
1099 	      tprintf("near forward branch bonus\n");
1100 	      cycles (2);
1101 	    }
1102 	  else
1103 	    {
1104 	      cycles (3);
1105 	      branch_alignment_penalty = 1;
1106 	    }
1107 #ifdef CYCLE_STATS
1108 	  branch_stalls ++;
1109 #endif
1110 #endif
1111 	}
1112 #ifdef CYCLE_ACCURATE
1113       else
1114 	cycles (1);
1115 #endif
1116       break;
1117 
1118     case RXO_brk:
1119       {
1120 	int old_psw = regs.r_psw;
1121 	if (rx_in_gdb)
1122 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1123 	if (regs.r_intb == 0)
1124 	  {
1125 	    tprintf("BREAK hit, no vector table.\n");
1126 	    DO_RETURN (RX_MAKE_EXITED(1));
1127 	  }
1128 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1129 	pushpc (old_psw);
1130 	pushpc (regs.r_pc);
1131 	regs.r_pc = mem_get_si (regs.r_intb);
1132 	cycles(6);
1133       }
1134       break;
1135 
1136     case RXO_bset:
1137       ma = GD ();
1138       mb = GS ();
1139       if (opcode->op[0].type == RX_Operand_Register)
1140 	mb &= 0x1f;
1141       else
1142 	mb &= 0x07;
1143       ma |= (1 << mb);
1144       PD (ma);
1145       EBIT;
1146       break;
1147 
1148     case RXO_btst:
1149       ma = GS ();
1150       mb = GS2 ();
1151       if (opcode->op[1].type == RX_Operand_Register)
1152 	mb &= 0x1f;
1153       else
1154 	mb &= 0x07;
1155       umb = ma & (1 << mb);
1156       set_zc (! umb, umb);
1157       EBIT;
1158       break;
1159 
1160     case RXO_clrpsw:
1161       v = 1 << opcode->op[0].reg;
1162       if (FLAG_PM
1163 	  && (v == FLAGBIT_I
1164 	      || v == FLAGBIT_U))
1165 	break;
1166       regs.r_psw &= ~v;
1167       cycles (1);
1168       break;
1169 
1170     case RXO_div: /* d = d / s */
1171       ma = GS();
1172       mb = GD();
1173       tprintf("%d / %d = ", mb, ma);
1174       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1175 	{
1176 	  tprintf("#NAN\n");
1177 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1178 	  cycles (3);
1179 	}
1180       else
1181 	{
1182 	  v = mb/ma;
1183 	  tprintf("%d\n", v);
1184 	  set_flags (FLAGBIT_O, 0);
1185 	  PD (v);
1186 	  div_cycles (mb, ma);
1187 	}
1188       break;
1189 
1190     case RXO_divu: /* d = d / s */
1191       uma = GS();
1192       umb = GD();
1193       tprintf("%u / %u = ", umb, uma);
1194       if (uma == 0)
1195 	{
1196 	  tprintf("#NAN\n");
1197 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1198 	  cycles (2);
1199 	}
1200       else
1201 	{
1202 	  v = umb / uma;
1203 	  tprintf("%u\n", v);
1204 	  set_flags (FLAGBIT_O, 0);
1205 	  PD (v);
1206 	  divu_cycles (umb, uma);
1207 	}
1208       break;
1209 
1210     case RXO_emul:
1211       ma = GD ();
1212       mb = GS ();
1213       sll = (long long)ma * (long long)mb;
1214       tprintf("%d * %d = %lld\n", ma, mb, sll);
1215       put_reg (opcode->op[0].reg, sll);
1216       put_reg (opcode->op[0].reg + 1, sll >> 32);
1217       E2;
1218       break;
1219 
1220     case RXO_emulu:
1221       uma = GD ();
1222       umb = GS ();
1223       ll = (long long)uma * (long long)umb;
1224       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1225       put_reg (opcode->op[0].reg, ll);
1226       put_reg (opcode->op[0].reg + 1, ll >> 32);
1227       E2;
1228       break;
1229 
1230     case RXO_fadd:
1231       FLOAT_OP (fadd);
1232       E (4);
1233       break;
1234 
1235     case RXO_fcmp:
1236       ma = GD();
1237       mb = GS();
1238       FPCLEAR ();
1239       rxfp_cmp (ma, mb);
1240       FPCHECK ();
1241       E (1);
1242       break;
1243 
1244     case RXO_fdiv:
1245       FLOAT_OP (fdiv);
1246       E (16);
1247       break;
1248 
1249     case RXO_fmul:
1250       FLOAT_OP (fmul);
1251       E (3);
1252       break;
1253 
1254     case RXO_rtfi:
1255       PRIVILEDGED ();
1256       regs.r_psw = regs.r_bpsw;
1257       regs.r_pc = regs.r_bpc;
1258 #ifdef CYCLE_ACCURATE
1259       regs.fast_return = 0;
1260       cycles(3);
1261 #endif
1262       break;
1263 
1264     case RXO_fsub:
1265       FLOAT_OP (fsub);
1266       E (4);
1267       break;
1268 
1269     case RXO_ftoi:
1270       ma = GS ();
1271       FPCLEAR ();
1272       mb = rxfp_ftoi (ma, FPRM_ZERO);
1273       FPCHECK ();
1274       PD (mb);
1275       tprintf("(int) %g = %d\n", int2float(ma), mb);
1276       set_sz (mb, 4);
1277       E (2);
1278       break;
1279 
1280     case RXO_int:
1281       v = GS ();
1282       if (v == 255)
1283 	{
1284 	  int rc = rx_syscall (regs.r[5]);
1285 	  if (! RX_STEPPED (rc))
1286 	    DO_RETURN (rc);
1287 	}
1288       else
1289 	{
1290 	  int old_psw = regs.r_psw;
1291 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1292 	  pushpc (old_psw);
1293 	  pushpc (regs.r_pc);
1294 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1295 	}
1296       cycles (6);
1297       break;
1298 
1299     case RXO_itof:
1300       ma = GS ();
1301       FPCLEAR ();
1302       mb = rxfp_itof (ma, regs.r_fpsw);
1303       FPCHECK ();
1304       tprintf("(float) %d = %x\n", ma, mb);
1305       PD (mb);
1306       set_sz (ma, 4);
1307       E (2);
1308       break;
1309 
1310     case RXO_jsr:
1311     case RXO_jsrrel:
1312       {
1313 #ifdef CYCLE_ACCURATE
1314 	int delta;
1315 	regs.m2m = 0;
1316 #endif
1317 	v = GD ();
1318 #ifdef CYCLE_ACCURATE
1319 	regs.link_register = regs.r_pc;
1320 #endif
1321 	pushpc (get_reg (pc));
1322 	if (opcode->id == RXO_jsrrel)
1323 	  v += regs.r_pc;
1324 #ifdef CYCLE_ACCURATE
1325 	delta = v - regs.r_pc;
1326 #endif
1327 	put_reg (pc, v);
1328 #ifdef CYCLE_ACCURATE
1329 	/* Note: docs say 3, chip says 2 */
1330 	if (delta >= 0 && delta < 16)
1331 	  {
1332 	    tprintf ("near forward jsr bonus\n");
1333 	    cycles (2);
1334 	  }
1335 	else
1336 	  {
1337 	    branch_alignment_penalty = 1;
1338 	    cycles (3);
1339 	  }
1340 	regs.fast_return = 1;
1341 #endif
1342       }
1343       break;
1344 
1345     case RXO_machi:
1346       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1347       ll <<= 16;
1348       put_reg64 (acc64, ll + regs.r_acc);
1349       E1;
1350       break;
1351 
1352     case RXO_maclo:
1353       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1354       ll <<= 16;
1355       put_reg64 (acc64, ll + regs.r_acc);
1356       E1;
1357       break;
1358 
1359     case RXO_max:
1360       mb = GS();
1361       ma = GD();
1362       if (ma > mb)
1363 	PD (ma);
1364       else
1365 	PD (mb);
1366       E (1);
1367       break;
1368 
1369     case RXO_min:
1370       mb = GS();
1371       ma = GD();
1372       if (ma < mb)
1373 	PD (ma);
1374       else
1375 	PD (mb);
1376       E (1);
1377       break;
1378 
1379     case RXO_mov:
1380       v = GS ();
1381 
1382       if (opcode->op[1].type == RX_Operand_Register
1383 	  && opcode->op[1].reg == 17 /* PC */)
1384 	{
1385 	  /* Special case.  We want the address of the insn, not the
1386 	     address of the next insn.  */
1387 	  v = opcode_pc;
1388 	}
1389 
1390       if (opcode->op[0].type == RX_Operand_Register
1391 	  && opcode->op[0].reg == 16 /* PSW */)
1392 	{
1393 	  /* Special case, LDC and POPC can't ever modify PM.  */
1394 	  int pm = regs.r_psw & FLAGBIT_PM;
1395 	  v &= ~ FLAGBIT_PM;
1396 	  v |= pm;
1397 	  if (pm)
1398 	    {
1399 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1400 	      v |= pm;
1401 	    }
1402 	}
1403       if (FLAG_PM)
1404 	{
1405 	  /* various things can't be changed in user mode.  */
1406 	  if (opcode->op[0].type == RX_Operand_Register)
1407 	    if (opcode->op[0].reg == 32)
1408 	      {
1409 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1410 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1411 	      }
1412 	  if (opcode->op[0].reg == 34 /* ISP */
1413 	      || opcode->op[0].reg == 37 /* BPSW */
1414 	      || opcode->op[0].reg == 39 /* INTB */
1415 	      || opcode->op[0].reg == 38 /* VCT */)
1416 	    /* These are ignored.  */
1417 	    break;
1418 	}
1419       if (OM(0) && OM(1))
1420 	cycles (2);
1421       else
1422 	cycles (1);
1423 
1424       PD (v);
1425 
1426 #ifdef CYCLE_ACCURATE
1427       if ((opcode->op[0].type == RX_Operand_Predec
1428 	   && opcode->op[1].type == RX_Operand_Register)
1429 	  || (opcode->op[0].type == RX_Operand_Postinc
1430 	      && opcode->op[1].type == RX_Operand_Register))
1431 	{
1432 	  /* Special case: push reg doesn't cause a memory stall.  */
1433 	  memory_dest = 0;
1434 	  tprintf("push special case\n");
1435 	}
1436 #endif
1437 
1438       set_sz (v, DSZ());
1439       break;
1440 
1441     case RXO_movbi:
1442       PD (GS ());
1443       cycles (1);
1444       break;
1445 
1446     case RXO_movbir:
1447       PS (GD ());
1448       cycles (1);
1449       break;
1450 
1451     case RXO_mul:
1452       v = US2 ();
1453       ll = (unsigned long long) US1() * (unsigned long long) v;
1454       PD(ll);
1455       E (1);
1456       break;
1457 
1458     case RXO_mulhi:
1459       v = GS2 ();
1460       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1461       ll <<= 16;
1462       put_reg64 (acc64, ll);
1463       E1;
1464       break;
1465 
1466     case RXO_mullo:
1467       v = GS2 ();
1468       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1469       ll <<= 16;
1470       put_reg64 (acc64, ll);
1471       E1;
1472       break;
1473 
1474     case RXO_mvfachi:
1475       PD (get_reg (acchi));
1476       E1;
1477       break;
1478 
1479     case RXO_mvfaclo:
1480       PD (get_reg (acclo));
1481       E1;
1482       break;
1483 
1484     case RXO_mvfacmi:
1485       PD (get_reg (accmi));
1486       E1;
1487       break;
1488 
1489     case RXO_mvtachi:
1490       put_reg (acchi, GS ());
1491       E1;
1492       break;
1493 
1494     case RXO_mvtaclo:
1495       put_reg (acclo, GS ());
1496       E1;
1497       break;
1498 
1499     case RXO_mvtipl:
1500       regs.r_psw &= ~ FLAGBITS_IPL;
1501       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1502       E1;
1503       break;
1504 
1505     case RXO_nop:
1506     case RXO_nop2:
1507     case RXO_nop3:
1508       E1;
1509       break;
1510 
1511     case RXO_or:
1512       LOGIC_OP (|);
1513       break;
1514 
1515     case RXO_popm:
1516       /* POPM cannot pop R0 (sp).  */
1517       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1518 	EXCEPTION (EX_UNDEFINED);
1519       if (opcode->op[1].reg >= opcode->op[2].reg)
1520 	{
1521 	  regs.r_pc = opcode_pc;
1522 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1523 	}
1524       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1525 	{
1526 	  cycles (1);
1527 	  RLD (v);
1528 	  put_reg (v, pop ());
1529 	}
1530       break;
1531 
1532     case RXO_pushm:
1533       /* PUSHM cannot push R0 (sp).  */
1534       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1535 	EXCEPTION (EX_UNDEFINED);
1536       if (opcode->op[1].reg >= opcode->op[2].reg)
1537 	{
1538 	  regs.r_pc = opcode_pc;
1539 	  return RX_MAKE_STOPPED (SIGILL);
1540 	}
1541       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1542 	{
1543 	  RL (v);
1544 	  push (get_reg (v));
1545 	}
1546       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1547       break;
1548 
1549     case RXO_racw:
1550       ll = get_reg64 (acc64) << GS ();
1551       ll += 0x80000000ULL;
1552       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1553 	ll = 0x00007fff00000000ULL;
1554       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1555 	ll = 0xffff800000000000ULL;
1556       else
1557 	ll &= 0xffffffff00000000ULL;
1558       put_reg64 (acc64, ll);
1559       E1;
1560       break;
1561 
1562     case RXO_rte:
1563       PRIVILEDGED ();
1564       regs.r_pc = poppc ();
1565       regs.r_psw = poppc ();
1566       if (FLAG_PM)
1567 	regs.r_psw |= FLAGBIT_U;
1568 #ifdef CYCLE_ACCURATE
1569       regs.fast_return = 0;
1570       cycles (6);
1571 #endif
1572       break;
1573 
1574     case RXO_revl:
1575       uma = GS ();
1576       umb = (((uma >> 24) & 0xff)
1577 	     | ((uma >> 8) & 0xff00)
1578 	     | ((uma << 8) & 0xff0000)
1579 	     | ((uma << 24) & 0xff000000UL));
1580       PD (umb);
1581       E1;
1582       break;
1583 
1584     case RXO_revw:
1585       uma = GS ();
1586       umb = (((uma >> 8) & 0x00ff00ff)
1587 	     | ((uma << 8) & 0xff00ff00UL));
1588       PD (umb);
1589       E1;
1590       break;
1591 
1592     case RXO_rmpa:
1593       RL(4);
1594       RL(5);
1595 #ifdef CYCLE_ACCURATE
1596       tx = regs.r[3];
1597 #endif
1598 
1599       while (regs.r[3] != 0)
1600 	{
1601 	  long long tmp;
1602 
1603 	  switch (opcode->size)
1604 	    {
1605 	    case RX_Long:
1606 	      ma = mem_get_si (regs.r[1]);
1607 	      mb = mem_get_si (regs.r[2]);
1608 	      regs.r[1] += 4;
1609 	      regs.r[2] += 4;
1610 	      break;
1611 	    case RX_Word:
1612 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1613 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1614 	      regs.r[1] += 2;
1615 	      regs.r[2] += 2;
1616 	      break;
1617 	    case RX_Byte:
1618 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1619 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1620 	      regs.r[1] += 1;
1621 	      regs.r[2] += 1;
1622 	      break;
1623 	    default:
1624 	      abort ();
1625 	    }
1626 	  /* We do the multiply as a signed value.  */
1627 	  sll = (long long)ma * (long long)mb;
1628 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1629 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1630 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1631 	  regs.r[4] = tmp & 0xffffffffUL;
1632 	  tmp >>= 32;
1633 	  sll >>= 32;
1634 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1635 	  regs.r[5] = tmp & 0xffffffffUL;
1636 	  tmp >>= 32;
1637 	  sll >>= 32;
1638 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1639 	  regs.r[6] = tmp & 0xffffffffUL;
1640 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1641 		  (unsigned long) regs.r[6],
1642 		  (unsigned long) regs.r[5],
1643 		  (unsigned long) regs.r[4]);
1644 
1645 	  regs.r[3] --;
1646 	}
1647       if (regs.r[6] & 0x00008000)
1648 	regs.r[6] |= 0xffff0000UL;
1649       else
1650 	regs.r[6] &= 0x0000ffff;
1651       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1652       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1653 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1654       else
1655 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1656 #ifdef CYCLE_ACCURATE
1657       switch (opcode->size)
1658 	{
1659 	case RX_Long:
1660 	  cycles (6 + 4 * tx);
1661 	  break;
1662 	case RX_Word:
1663 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1664 	  break;
1665 	case RX_Byte:
1666 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1667 	  break;
1668 	default:
1669 	  abort ();
1670 	}
1671 #endif
1672       break;
1673 
1674     case RXO_rolc:
1675       v = GD ();
1676       ma = v & 0x80000000UL;
1677       v <<= 1;
1678       v |= carry;
1679       set_szc (v, 4, ma);
1680       PD (v);
1681       E1;
1682       break;
1683 
1684     case RXO_rorc:
1685       uma = GD ();
1686       mb = uma & 1;
1687       uma >>= 1;
1688       uma |= (carry ? 0x80000000UL : 0);
1689       set_szc (uma, 4, mb);
1690       PD (uma);
1691       E1;
1692       break;
1693 
1694     case RXO_rotl:
1695       mb = GS ();
1696       uma = GD ();
1697       if (mb)
1698 	{
1699 	  uma = (uma << mb) | (uma >> (32-mb));
1700 	  mb = uma & 1;
1701 	}
1702       set_szc (uma, 4, mb);
1703       PD (uma);
1704       E1;
1705       break;
1706 
1707     case RXO_rotr:
1708       mb = GS ();
1709       uma = GD ();
1710       if (mb)
1711 	{
1712 	  uma = (uma >> mb) | (uma << (32-mb));
1713 	  mb = uma & 0x80000000;
1714 	}
1715       set_szc (uma, 4, mb);
1716       PD (uma);
1717       E1;
1718       break;
1719 
1720     case RXO_round:
1721       ma = GS ();
1722       FPCLEAR ();
1723       mb = rxfp_ftoi (ma, regs.r_fpsw);
1724       FPCHECK ();
1725       PD (mb);
1726       tprintf("(int) %g = %d\n", int2float(ma), mb);
1727       set_sz (mb, 4);
1728       E (2);
1729       break;
1730 
1731     case RXO_rts:
1732       {
1733 #ifdef CYCLE_ACCURATE
1734 	int cyc = 5;
1735 #endif
1736 	regs.r_pc = poppc ();
1737 #ifdef CYCLE_ACCURATE
1738 	/* Note: specs say 5, chip says 3.  */
1739 	if (regs.fast_return && regs.link_register == regs.r_pc)
1740 	  {
1741 #ifdef CYCLE_STATS
1742 	    fast_returns ++;
1743 #endif
1744 	    tprintf("fast return bonus\n");
1745 	    cyc -= 2;
1746 	  }
1747 	cycles (cyc);
1748 	regs.fast_return = 0;
1749 	branch_alignment_penalty = 1;
1750 #endif
1751       }
1752       break;
1753 
1754     case RXO_rtsd:
1755       if (opcode->op[2].type == RX_Operand_Register)
1756 	{
1757 	  int i;
1758 	  /* RTSD cannot pop R0 (sp).  */
1759 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1760 	  if (opcode->op[2].reg == 0)
1761 	    EXCEPTION (EX_UNDEFINED);
1762 #ifdef CYCLE_ACCURATE
1763 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1764 #endif
1765 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1766 	    {
1767 	      RLD (i);
1768 	      put_reg (i, pop ());
1769 	    }
1770 	}
1771       else
1772 	{
1773 #ifdef CYCLE_ACCURATE
1774 	  tx = 0;
1775 #endif
1776 	  put_reg (0, get_reg (0) + GS());
1777 	}
1778       put_reg (pc, poppc());
1779 #ifdef CYCLE_ACCURATE
1780       if (regs.fast_return && regs.link_register == regs.r_pc)
1781 	{
1782 	  tprintf("fast return bonus\n");
1783 #ifdef CYCLE_STATS
1784 	  fast_returns ++;
1785 #endif
1786 	  cycles (tx < 3 ? 3 : tx + 1);
1787 	}
1788       else
1789 	{
1790 	  cycles (tx < 5 ? 5 : tx + 1);
1791 	}
1792       regs.fast_return = 0;
1793       branch_alignment_penalty = 1;
1794 #endif
1795       break;
1796 
1797     case RXO_sat:
1798       if (FLAG_O && FLAG_S)
1799 	PD (0x7fffffffUL);
1800       else if (FLAG_O && ! FLAG_S)
1801 	PD (0x80000000UL);
1802       E1;
1803       break;
1804 
1805     case RXO_satr:
1806       if (FLAG_O && ! FLAG_S)
1807 	{
1808 	  put_reg (6, 0x0);
1809 	  put_reg (5, 0x7fffffff);
1810 	  put_reg (4, 0xffffffff);
1811 	}
1812       else if (FLAG_O && FLAG_S)
1813 	{
1814 	  put_reg (6, 0xffffffff);
1815 	  put_reg (5, 0x80000000);
1816 	  put_reg (4, 0x0);
1817 	}
1818       E1;
1819       break;
1820 
1821     case RXO_sbb:
1822       MATH_OP (-, ! carry);
1823       break;
1824 
1825     case RXO_sccnd:
1826       if (GS())
1827 	PD (1);
1828       else
1829 	PD (0);
1830       E1;
1831       break;
1832 
1833     case RXO_scmpu:
1834 #ifdef CYCLE_ACCURATE
1835       tx = regs.r[3];
1836 #endif
1837       while (regs.r[3] != 0)
1838 	{
1839 	  uma = mem_get_qi (regs.r[1] ++);
1840 	  umb = mem_get_qi (regs.r[2] ++);
1841 	  regs.r[3] --;
1842 	  if (uma != umb || uma == 0)
1843 	    break;
1844 	}
1845       if (uma == umb)
1846 	set_zc (1, 1);
1847       else
1848 	set_zc (0, ((int)uma - (int)umb) >= 0);
1849       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1850       break;
1851 
1852     case RXO_setpsw:
1853       v = 1 << opcode->op[0].reg;
1854       if (FLAG_PM
1855 	  && (v == FLAGBIT_I
1856 	      || v == FLAGBIT_U))
1857 	break;
1858       regs.r_psw |= v;
1859       cycles (1);
1860       break;
1861 
1862     case RXO_smovb:
1863       RL (3);
1864 #ifdef CYCLE_ACCURATE
1865       tx = regs.r[3];
1866 #endif
1867       while (regs.r[3])
1868 	{
1869 	  uma = mem_get_qi (regs.r[2] --);
1870 	  mem_put_qi (regs.r[1]--, uma);
1871 	  regs.r[3] --;
1872 	}
1873 #ifdef CYCLE_ACCURATE
1874       if (tx > 3)
1875 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1876       else
1877 	cycles (2 + 3 * (tx % 4));
1878 #endif
1879       break;
1880 
1881     case RXO_smovf:
1882       RL (3);
1883 #ifdef CYCLE_ACCURATE
1884       tx = regs.r[3];
1885 #endif
1886       while (regs.r[3])
1887 	{
1888 	  uma = mem_get_qi (regs.r[2] ++);
1889 	  mem_put_qi (regs.r[1]++, uma);
1890 	  regs.r[3] --;
1891 	}
1892       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1893       break;
1894 
1895     case RXO_smovu:
1896 #ifdef CYCLE_ACCURATE
1897       tx = regs.r[3];
1898 #endif
1899       while (regs.r[3] != 0)
1900 	{
1901 	  uma = mem_get_qi (regs.r[2] ++);
1902 	  mem_put_qi (regs.r[1]++, uma);
1903 	  regs.r[3] --;
1904 	  if (uma == 0)
1905 	    break;
1906 	}
1907       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1908       break;
1909 
1910     case RXO_shar: /* d = ma >> mb */
1911       SHIFT_OP (sll, int, mb, >>=, 1);
1912       E (1);
1913       break;
1914 
1915     case RXO_shll: /* d = ma << mb */
1916       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1917       E (1);
1918       break;
1919 
1920     case RXO_shlr: /* d = ma >> mb */
1921       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1922       E (1);
1923       break;
1924 
1925     case RXO_sstr:
1926       RL (3);
1927 #ifdef CYCLE_ACCURATE
1928       tx = regs.r[3];
1929 #endif
1930       switch (opcode->size)
1931 	{
1932 	case RX_Long:
1933 	  while (regs.r[3] != 0)
1934 	    {
1935 	      mem_put_si (regs.r[1], regs.r[2]);
1936 	      regs.r[1] += 4;
1937 	      regs.r[3] --;
1938 	    }
1939 	  cycles (2 + tx);
1940 	  break;
1941 	case RX_Word:
1942 	  while (regs.r[3] != 0)
1943 	    {
1944 	      mem_put_hi (regs.r[1], regs.r[2]);
1945 	      regs.r[1] += 2;
1946 	      regs.r[3] --;
1947 	    }
1948 	  cycles (2 + (int)(tx / 2) + tx % 2);
1949 	  break;
1950 	case RX_Byte:
1951 	  while (regs.r[3] != 0)
1952 	    {
1953 	      mem_put_qi (regs.r[1], regs.r[2]);
1954 	      regs.r[1] ++;
1955 	      regs.r[3] --;
1956 	    }
1957 	  cycles (2 + (int)(tx / 4) + tx % 4);
1958 	  break;
1959 	default:
1960 	  abort ();
1961 	}
1962       break;
1963 
1964     case RXO_stcc:
1965       if (GS2())
1966 	PD (GS ());
1967       E1;
1968       break;
1969 
1970     case RXO_stop:
1971       PRIVILEDGED ();
1972       regs.r_psw |= FLAGBIT_I;
1973       DO_RETURN (RX_MAKE_STOPPED(0));
1974 
1975     case RXO_sub:
1976       MATH_OP (-, 0);
1977       break;
1978 
1979     case RXO_suntil:
1980       RL(3);
1981 #ifdef CYCLE_ACCURATE
1982       tx = 0;
1983 #endif
1984       if (regs.r[3] == 0)
1985 	{
1986 	  cycles (3);
1987 	  break;
1988 	}
1989       switch (opcode->size)
1990 	{
1991 	case RX_Long:
1992 	  uma = get_reg (2);
1993 	  while (regs.r[3] != 0)
1994 	    {
1995 	      regs.r[3] --;
1996 	      umb = mem_get_si (get_reg (1));
1997 	      regs.r[1] += 4;
1998 #ifdef CYCLE_ACCURATE
1999 	      tx ++;
2000 #endif
2001 	      if (umb == uma)
2002 		break;
2003 	    }
2004 #ifdef CYCLE_ACCURATE
2005 	  cycles (3 + 3 * tx);
2006 #endif
2007 	  break;
2008 	case RX_Word:
2009 	  uma = get_reg (2) & 0xffff;
2010 	  while (regs.r[3] != 0)
2011 	    {
2012 	      regs.r[3] --;
2013 	      umb = mem_get_hi (get_reg (1));
2014 	      regs.r[1] += 2;
2015 #ifdef CYCLE_ACCURATE
2016 	      tx ++;
2017 #endif
2018 	      if (umb == uma)
2019 		break;
2020 	    }
2021 #ifdef CYCLE_ACCURATE
2022 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2023 #endif
2024 	  break;
2025 	case RX_Byte:
2026 	  uma = get_reg (2) & 0xff;
2027 	  while (regs.r[3] != 0)
2028 	    {
2029 	      regs.r[3] --;
2030 	      umb = mem_get_qi (regs.r[1]);
2031 	      regs.r[1] += 1;
2032 #ifdef CYCLE_ACCURATE
2033 	      tx ++;
2034 #endif
2035 	      if (umb == uma)
2036 		break;
2037 	    }
2038 #ifdef CYCLE_ACCURATE
2039 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2040 #endif
2041 	  break;
2042 	default:
2043 	  abort();
2044 	}
2045       if (uma == umb)
2046 	set_zc (1, 1);
2047       else
2048 	set_zc (0, ((int)uma - (int)umb) >= 0);
2049       break;
2050 
2051     case RXO_swhile:
2052       RL(3);
2053 #ifdef CYCLE_ACCURATE
2054       tx = 0;
2055 #endif
2056       if (regs.r[3] == 0)
2057 	break;
2058       switch (opcode->size)
2059 	{
2060 	case RX_Long:
2061 	  uma = get_reg (2);
2062 	  while (regs.r[3] != 0)
2063 	    {
2064 	      regs.r[3] --;
2065 	      umb = mem_get_si (get_reg (1));
2066 	      regs.r[1] += 4;
2067 #ifdef CYCLE_ACCURATE
2068 	      tx ++;
2069 #endif
2070 	      if (umb != uma)
2071 		break;
2072 	    }
2073 #ifdef CYCLE_ACCURATE
2074 	  cycles (3 + 3 * tx);
2075 #endif
2076 	  break;
2077 	case RX_Word:
2078 	  uma = get_reg (2) & 0xffff;
2079 	  while (regs.r[3] != 0)
2080 	    {
2081 	      regs.r[3] --;
2082 	      umb = mem_get_hi (get_reg (1));
2083 	      regs.r[1] += 2;
2084 #ifdef CYCLE_ACCURATE
2085 	      tx ++;
2086 #endif
2087 	      if (umb != uma)
2088 		break;
2089 	    }
2090 #ifdef CYCLE_ACCURATE
2091 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2092 #endif
2093 	  break;
2094 	case RX_Byte:
2095 	  uma = get_reg (2) & 0xff;
2096 	  while (regs.r[3] != 0)
2097 	    {
2098 	      regs.r[3] --;
2099 	      umb = mem_get_qi (regs.r[1]);
2100 	      regs.r[1] += 1;
2101 #ifdef CYCLE_ACCURATE
2102 	      tx ++;
2103 #endif
2104 	      if (umb != uma)
2105 		break;
2106 	    }
2107 #ifdef CYCLE_ACCURATE
2108 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2109 #endif
2110 	  break;
2111 	default:
2112 	  abort();
2113 	}
2114       if (uma == umb)
2115 	set_zc (1, 1);
2116       else
2117 	set_zc (0, ((int)uma - (int)umb) >= 0);
2118       break;
2119 
2120     case RXO_wait:
2121       PRIVILEDGED ();
2122       regs.r_psw |= FLAGBIT_I;
2123       DO_RETURN (RX_MAKE_STOPPED(0));
2124 
2125     case RXO_xchg:
2126 #ifdef CYCLE_ACCURATE
2127       regs.m2m = 0;
2128 #endif
2129       v = GS (); /* This is the memory operand, if any.  */
2130       PS (GD ()); /* and this may change the address register.  */
2131       PD (v);
2132       E2;
2133 #ifdef CYCLE_ACCURATE
2134       /* all M cycles happen during xchg's cycles.  */
2135       memory_dest = 0;
2136       memory_source = 0;
2137 #endif
2138       break;
2139 
2140     case RXO_xor:
2141       LOGIC_OP (^);
2142       break;
2143 
2144     default:
2145       EXCEPTION (EX_UNDEFINED);
2146     }
2147 
2148 #ifdef CYCLE_ACCURATE
2149   regs.m2m = 0;
2150   if (memory_source)
2151     regs.m2m |= M2M_SRC;
2152   if (memory_dest)
2153     regs.m2m |= M2M_DST;
2154 
2155   regs.rt = new_rt;
2156   new_rt = -1;
2157 #endif
2158 
2159 #ifdef CYCLE_STATS
2160   if (prev_cycle_count == regs.cycle_count)
2161     {
2162       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2163       abort ();
2164     }
2165 #endif
2166 
2167 #ifdef CYCLE_STATS
2168   if (running_benchmark)
2169     {
2170       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2171 
2172 
2173       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2174       times_per_id[opcode->id][omap] ++;
2175 
2176       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2177 
2178       prev_opcode_id = opcode->id;
2179       po0 = omap;
2180     }
2181 #endif
2182 
2183   return RX_MAKE_STEPPED ();
2184 }
2185 
2186 #ifdef CYCLE_STATS
2187 void
reset_pipeline_stats(void)2188 reset_pipeline_stats (void)
2189 {
2190   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2191   memset (times_per_id, 0, sizeof(times_per_id));
2192   memory_stalls = 0;
2193   register_stalls = 0;
2194   branch_stalls = 0;
2195   branch_alignment_stalls = 0;
2196   fast_returns = 0;
2197   memset (times_per_pair, 0, sizeof(times_per_pair));
2198   running_benchmark = 1;
2199 
2200   benchmark_start_cycle = regs.cycle_count;
2201 }
2202 
2203 void
halt_pipeline_stats(void)2204 halt_pipeline_stats (void)
2205 {
2206   running_benchmark = 0;
2207   benchmark_end_cycle = regs.cycle_count;
2208 }
2209 #endif
2210 
2211 void
pipeline_stats(void)2212 pipeline_stats (void)
2213 {
2214 #ifdef CYCLE_STATS
2215   int i, o1;
2216   int p, p1;
2217 #endif
2218 
2219 #ifdef CYCLE_ACCURATE
2220   if (verbose == 1)
2221     {
2222       printf ("cycles: %llu\n", regs.cycle_count);
2223       return;
2224     }
2225 
2226   printf ("cycles: %13s\n", comma (regs.cycle_count));
2227 #endif
2228 
2229 #ifdef CYCLE_STATS
2230   if (benchmark_start_cycle)
2231     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2232 
2233   printf("\n");
2234   for (i = 0; i < N_RXO; i++)
2235     for (o1 = 0; o1 < N_MAP; o1 ++)
2236       if (times_per_id[i][o1])
2237 	printf("%13s %13s %7.2f  %s %s\n",
2238 	       comma (cycles_per_id[i][o1]),
2239 	       comma (times_per_id[i][o1]),
2240 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2241 	       op_cache_string(o1),
2242 	       id_names[i]+4);
2243 
2244   printf("\n");
2245   for (p = 0; p < N_RXO; p ++)
2246     for (p1 = 0; p1 < N_MAP; p1 ++)
2247       for (i = 0; i < N_RXO; i ++)
2248 	for (o1 = 0; o1 < N_MAP; o1 ++)
2249 	  if (times_per_pair[p][p1][i][o1])
2250 	    {
2251 	      printf("%13s   %s %-9s  ->  %s %s\n",
2252 		     comma (times_per_pair[p][p1][i][o1]),
2253 		     op_cache_string(p1),
2254 		     id_names[p]+4,
2255 		     op_cache_string(o1),
2256 		     id_names[i]+4);
2257 	    }
2258 
2259   printf("\n");
2260   printf("%13s memory stalls\n", comma (memory_stalls));
2261   printf("%13s register stalls\n", comma (register_stalls));
2262   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2263   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2264   printf("%13s fast returns\n", comma (fast_returns));
2265 #endif
2266 }
2267