1 /* sim.c: simulator functions
2  * $Id: sim.c,v 1.4 2004/02/22 06:54:25 varfar Exp $
3  */
4 
5 /* This file is part of `exhaust', a memory array redcode simulator.
6  * Author: M Joonas Pihlaja
7  * Public Domain.
8  */
9 
10 /*
11  * Thanks go to the pMARS authors and Ken Espiritu whose ideas have
12  * been used in this simulator.  Especially Ken's effective addressing
13  * calculation code in pMARS 0.8.6 has been adapted for use here.
14  */
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "exhaust.h"
21 #include "asm.h"
22 #include "sim.h"
23 #include "insn.h"
24 
25 /* Should we strip flags from instructions when loading?  By default,
26    yes.  If so, then the simulator won't bother masking them off.  */
27 #ifndef SIM_STRIP_FLAGS
28 #define SIM_STRIP_FLAGS 1
29 #endif
30 
31 
32 /* DEBUG level:
33  *     0: none
34  *   >=1: disassemble each instruction (no output)
35  *     2: print each instruction as it is executed
36  */
37 
38 /*
39  * File scoped stuff
40  */
41 
42 /* internal warrior structure */
43 typedef struct w_st {
44   field_t *tail;		/* next free location to queue a process */
45   field_t *head;		/* next process to run from queue */
46   struct w_st *succ;		/* next warrior alive */
47   u32_t nprocs;			/* number of live processes in this warrior */
48   struct w_st *pred;		/* previous warrior alive */
49   int id;			/* index (or identity) of warrior */
50 } w_t;
51 
52 #define DEF_MAX_WARS 2
53 #define DEF_CORESIZE 8000
54 #define DEF_PROCESSES 8000
55 #define DEF_CYCLES 80000
56 
57 static unsigned int Coresize  = 0;
58 static unsigned int Processes = 0;
59 static unsigned int NWarriors = 0;
60 static unsigned int Cycles    = 0;
61 
62 static w_t          *War_Tab = NULL;
63 static insn_t       *Core_Mem = NULL;
64 static field_t      *Queue_Mem = NULL;
65 
66 /* P-space */
67 static unsigned int PSpace_size = 0;    /* # p-space slots per warrior. */
68 static pspace_t	    **PSpaces;	        /* p-spaces of each warrior. */
69 
70 /* protos */
71 static int sim_proper( unsigned int, const field_t *, unsigned int * );
72 static void alloc_pspaces( unsigned int nwars, unsigned int pspacesize );
73 static void free_pspaces( unsigned int nwars );
74 
75 /*** exec_trail support ***/
76 
77 static step_cb_t step_cb = NULL;
78 
set_step_cb(step_cb_t callback)79 void set_step_cb(step_cb_t callback) {
80 	step_cb = callback;
81 }
82 
83 /*---------------------------------------------------------------
84  * Simulator memory management
85  */
86 
87 void
sim_clear_core()88 sim_clear_core()
89 {
90   memset(Core_Mem, 0, Coresize*sizeof(insn_t));
91 }
92 
93 
94 /* NAME
95  *     sim_alloc_bufs, sim_alloc_bufs2, sim_free_bufs --
96  *				alloc and free buffers used in simulation
97  *
98  * SYNOPSIS
99  *     insn_t *sim_alloc_bufs( unsigned int nwars, unsigned int coresize,
100  *                             unsigned int processes, unsigned int cycles );
101  *     insn_t *sim_alloc_bufs2( unsigned int nwars, unsigned int coresize,
102  *				unsigned int processes, unsigned int cycles,
103  *				unsigned int pspacesize );
104  *     void sim_free_bufs();
105  *
106  * INPUTS
107  *     nwar        -- number of warriors
108  *     coresize    -- size of core
109  *     processes   -- max no of processes / warrior
110  *     cycles      -- the number of cycles to play before tie.
111  *     pspacesize  -- size of p-space per warrior.  For sim_alloc_bufs(),
112  *		      it defaults to min(1,coresize/16).
113  *
114  * RESULTS
115  *     These functions manage the core, queue, and w_t warrior info
116  *     struct array memories.
117  *
118  *     Core_Mem, Queue_Mem, War_Tab and PSpace_mem memories are allocated
119  *     or freed as requested.  Any earlier memories are freed before
120  *     new allocations are made.
121  *
122  * RETURN VALUE
123  *     sim_alloc_bufs(): the address of core memory, or NULL if
124  *			 out of memory.
125  *     sim_free_bufs(): none
126  *
127  * GLOBALS
128  *     All file scoped globals.
129  */
130 
131 void
sim_free_bufs()132 sim_free_bufs()
133 {
134   free_pspaces(NWarriors);
135   if ( Core_Mem ) free( Core_Mem ); Core_Mem = NULL; Coresize = 0;
136   if ( Queue_Mem ) free( Queue_Mem ); Queue_Mem = NULL; Processes = 0;
137   if ( War_Tab ) free( War_Tab ); War_Tab = NULL; NWarriors = 0;
138 }
139 
140 
141 insn_t *
sim_alloc_bufs2(unsigned int nwars,unsigned int coresize,unsigned int processes,unsigned int cycles,unsigned int pspace)142 sim_alloc_bufs2( unsigned int nwars, unsigned int coresize,
143 		 unsigned int processes, unsigned int cycles,
144 		 unsigned int pspace )
145 {
146   unsigned int queue_size;
147 
148   sim_free_bufs();
149 
150   Core_Mem = (insn_t*)malloc( sizeof(insn_t) * coresize );
151   queue_size = nwars*processes+1;
152   Queue_Mem = (field_t*)malloc( sizeof(field_t)*queue_size );
153   War_Tab = (w_t*)malloc( sizeof(w_t)*nwars );
154   alloc_pspaces(nwars, pspace);
155 
156   if ( Core_Mem && Queue_Mem && War_Tab && PSpaces ) {
157     Cycles = cycles;
158     NWarriors = nwars;
159     Coresize = coresize;
160     Processes = processes;
161     sim_clear_pspaces();
162     return Core_Mem;
163   }
164   sim_free_bufs();
165   return NULL;
166 }
167 
168 insn_t *
sim_alloc_bufs(unsigned int nwars,unsigned int coresize,unsigned int processes,unsigned int cycles)169 sim_alloc_bufs( unsigned int nwars, unsigned int coresize,
170 		unsigned int processes, unsigned int cycles )
171 {
172   unsigned int pspace;
173   pspace = coresize/16 == 0 ? 1 : coresize/16;
174   return sim_alloc_bufs2( nwars, coresize, processes, cycles, pspace );
175 }
176 
177 
178 /* NAME
179  *     sim_load_warrior -- load warrior code into core.
180  *
181  * SYNOPSIS
182  *     sim_load_warrior(unsigned int pos, insn_t *code, unsigned int len);
183  *
184  * INPUTS
185  *     pos -- The core address to load the warrior into.
186  *     code -- array of instructions.
187  *     len --
188  *
189  * DESCRIPTION
190  *     This function is the preferred method to load warriors into core.
191  *     It strips the instructions of any flags they may possess, and
192  *     copies the instructions into core.
193  *
194  *     The code will happily overwrite any previous contents of core,
195  *     so beware not to load warriors so that their code overlaps.
196  *
197  * NOTE
198  *     The core must have been allocated previously with sim(), or
199  *     preferably sim_alloc_bufs() and not freed since.
200  *
201  * RETURN VALUE
202  *     0 -- warrior loaded OK.
203  *    -1 -- core memory not allocated.
204  *    -2 -- warrior length > core size.
205  */
206 int
sim_load_warrior(unsigned int pos,const insn_t * code,unsigned int len)207 sim_load_warrior(unsigned int pos, const insn_t *code, unsigned int len)
208 {
209   unsigned int i;
210   field_t k;
211   u32_t in;
212 
213   if ( Core_Mem == NULL )  return -1;
214   if ( len > Coresize ) return -2;
215 
216   for (i=0; i<len; i++) {
217     k = (pos+i) % Coresize;
218 
219 #if SIM_STRIP_FLAGS
220     in = code[i].in & iMASK;
221 #else
222     in = code[i].in;
223 #endif
224 
225     Core_Mem[k].in = in;
226     Core_Mem[k].a = code[i].a;
227     Core_Mem[k].b = code[i].b;
228   }
229   return 0;
230 }
231 
232 
233 
234 
235 /*---------------------------------------------------------------
236  * P-Space management.
237  */
238 
239 static void
free_pspaces(unsigned int nwars)240 free_pspaces(unsigned int nwars)
241 {
242   unsigned int i;
243   if ( nwars>0 ) {
244     if (PSpaces) {
245       for (i=0; i<nwars; i++) {
246 	pspace_free(PSpaces[i]);
247       }
248       free(PSpaces);
249     }
250   }
251   PSpace_size = 0;
252   PSpaces = NULL;
253 }
254 
255 
256 static void
alloc_pspaces(unsigned int nwars,unsigned int pspacesize)257 alloc_pspaces(unsigned int nwars, unsigned int pspacesize)
258 {
259   unsigned int i;
260   int success = 0;
261 
262   PSpaces = NULL;
263   PSpace_size = 0;
264   if (nwars==0) { return; }
265 
266   if (( PSpaces = (pspace_t**)malloc(sizeof(pspace_t*)*nwars))) {
267     success = 1;
268     for (i=0; i<nwars; i++) { PSpaces[i] = NULL; }
269     for (i=0; success && i<nwars; i++) {
270 	PSpaces[i] = pspace_alloc(pspacesize);
271 	success = success && PSpaces[i] != NULL;
272     }
273   }
274 
275   if ( !success ) {
276     free_pspaces(nwars);
277   } else {
278     PSpace_size = pspacesize;
279   }
280 }
281 
282 
283 /* NAME
284  *     sim_get_pspaces -- get array of p-spaces of warriors.
285  *     sim_get_pspace -- get a single p-space.
286  *
287  * SYNOPSIS
288  *     pspace_t **sim_get_pspaces(void);
289  *     pspace_t *sim_get_pspace(unsigned int war_id)
290  *
291  * DESCRIPTION
292  *     Return an array of pointers to p-space structures of the warriors.
293  *     The pspace at index i is used as the pspace of the ith warrior
294  *     when fighting.
295  */
296 pspace_t **
sim_get_pspaces()297 sim_get_pspaces()
298 {
299     return PSpaces;
300 }
301 
302 pspace_t *
sim_get_pspace(unsigned int war_id)303 sim_get_pspace(unsigned int war_id)
304 {
305     return PSpaces[war_id];
306 }
307 
308 /* NAME
309  *     sim_clear_pspaces -- clear and/or reinitialise p-spaces
310  *     sim_reset_pspaces
311  *
312  * SYNOPSIS
313  *     void sim_clear_pspaces(void);
314  *     void sim_reset_pspaces(void);
315  *
316  * DESCRIPTION
317  *	All p-spaces are cleared and the P-space locations 0 are set
318  *	to CORESIZE-1.  For sim_reset_pspaces(): All p-spaces are made
319  *	private.
320  * */
321 
322 void
sim_clear_pspaces()323 sim_clear_pspaces()
324 {
325   unsigned int i;
326   for (i=0; i<NWarriors; i++) {
327     pspace_clear(PSpaces[i]);
328     pspace_set(PSpaces[i], 0, Coresize-1);
329   }
330 }
331 
332 void
sim_reset_pspaces()333 sim_reset_pspaces()
334 {
335   unsigned int i;
336   for (i=0; i<NWarriors; i++) {
337     pspace_privatise(PSpaces[i]);
338   }
339   sim_clear_pspaces();
340 }
341 
342 
343 
344 /*---------------------------------------------------------------
345  * Simulator interface
346  */
347 
348 /* NAME
349  *     sim, sim_mw -- public functions to simulate a round of Core War
350  *
351  * SYNOPSIS
352  *     int sim_mw( unsigned int nwar, const field_t *war_pos_tab,
353  *                 unsigned int *death_tab );
354  *     int sim( int nwar, field_t w1_start, field_t w2_start,
355  * 		unsigned int cycles, void **ptr_result );
356  *
357  * INPUTS
358  *     nwar        -- number of warriors
359  *     w1_start, w2_start -- core addresses of first processes
360  *                    warrior 1 and warrior 2. Warrior 1 executes first.
361  *     cycles      -- the number of cycles to play before tie.
362  *     ptr_result  -- NULL, except when requesting the address of core.
363  *     war_pos_tab -- core addresses where warriors are loaded in
364  *		      the order they are to be executed.
365  *     death_tab   -- the table where dead warrior indices are stored
366  *
367  * DESCRIPTION
368  *     The real simulator is inside sim_proper() to which sim() and
369  *     sim_mw() are proxies.  sim_mw() reads the warrior position
370  *     of the ith warrior from war_tab_pos[i-1].
371  *
372  * RESULTS
373  *     The warriors fight their fight in core which gets messed up in
374  *     the process.  If a warrior died during the fight then its p-space
375  *     location 0 is cleared.  Otherwise the number of warriors alive
376  *     at the end of the battle is stored into its p-space location 0.
377  *
378  *     sim_mw() stores indices of warriors that die into the death_tab
379  *     array in the order of death.  Warrior indices start from 0.
380  *
381  *     For sim(): If nwar == -1 then buffers of default size for
382  *     max. two warriors are allocated and the address of the core
383  *     memory is returned via the ptr_result pointer.
384  *
385  * RETURN VALUE
386  *     sim_mw(): the number of warriors still alive at the end of the
387  *		 battle.
388  *       -1: simulator panic attack -- something's gone wrong
389  *
390  *     sim():
391  *       single warrior: 0: warrior suicided, 1: warrior didn't die.
392  *       one-on-one two warriors:
393  * 	      0: warrior 1 won, 1: warrior 2 won, 2: tie
394  *       -1: simulator panic attack -- something's gone wrong
395  *
396  * GLOBALS
397  *     All file scoped globals */
398 
399 int
sim_mw(unsigned int nwar,const field_t * war_pos_tab,unsigned int * death_tab)400 sim_mw( unsigned int nwar, const field_t *war_pos_tab,
401 	unsigned int *death_tab )
402 {
403   int alive_count;
404   if ( !Core_Mem || !Queue_Mem || !War_Tab || !PSpaces ) return -1;
405 
406   alive_count = sim_proper( nwar, war_pos_tab, death_tab );
407 
408   /* Update p-space locations 0. */
409   if (alive_count >= 0) {
410     unsigned int nalive = alive_count;
411     unsigned int i;
412 
413     for (i=0; i<nwar; i++) {
414       pspace_set( PSpaces[i], 0, nalive);
415     }
416     for (i=0; i<nwar-nalive; i++) {
417       pspace_set( PSpaces[death_tab[i]], 0, 0);
418     }
419   }
420   return alive_count;
421 }
422 
423 
424 int
sim(int nwar,field_t w1_start,field_t w2_start,unsigned int cycles,void ** ptr_result)425 sim( int nwar,
426      field_t w1_start,
427      field_t w2_start,
428      unsigned int cycles,
429      void **ptr_result )
430 {
431   field_t war_pos_tab[2];
432   unsigned int death_tab[2];
433   int alive_cnt;
434 
435   /* if the caller requests for the address of core, allocate
436    * the default buffers and give it
437    */
438   if ( nwar < 0 ) {
439     if ( nwar == -1 && ptr_result ) {
440       *ptr_result = sim_alloc_bufs( DEF_MAX_WARS, DEF_CORESIZE,
441 				    DEF_PROCESSES, DEF_CYCLES );
442       return 0;
443     }
444     return -1;
445   }
446   if ( nwar > 2 ) return -1;
447 
448   /* otherwise set up things for sim_mw() */
449   Cycles = cycles;
450   war_pos_tab[0] = w1_start;
451   war_pos_tab[1] = w2_start;
452 
453   alive_cnt = sim_mw( nwar, war_pos_tab, death_tab );
454   if ( alive_cnt < 0 ) return -1;
455 
456   if ( nwar == 1) return alive_cnt;
457 
458   if ( alive_cnt == 2 ) return 2;
459   return death_tab[0] == 0 ? 1 : 0;
460 }
461 
462 
463 
464 /*-------------------------------------------------------------------------
465  * private functions
466  */
467 
468 /* NAME
469  *     sim_proper -- the real simulator code
470  *
471  * SYNOPSIS
472  *     int sim_proper( unsigned int nwar,
473  *                     const field_t *war_pos_tab,
474  *                     unsigned int *death_tab );
475  *
476  * INPUTS
477  *     nwar        -- number of warriors
478  *     war_pos_tab -- core addresses where warriors are loaded in
479  *		      the order they are to be executed.
480  *     death_tab   -- the table where dead warrior indices are stored
481  *
482  * RESULTS
483  *     The warriors fight their fight in core which gets messed up in
484  *     the process.  The indices of warriors that die are stored into
485  *     the death_tab[] array in the order of death.  Warrior indices
486  *     start from 0.
487  *
488  * RETURN VALUE
489  *     The number of warriors still alive at the end of the
490  *     battle or -1 on an anomalous condition.
491  *
492  * GLOBALS
493  *     All file scoped globals
494  */
495 
496 /* Various macros:
497  *
498  *  queue(x): Inserts a core address 'x' to the head of the current
499  *            warrior's process queue.  Assumes the warrior's
500  * 	      tail pointer is inside the queue buffer.
501  *
502  * x, y must be in 0..coresize-1 for the following macros:
503  *
504  * INCMOD(x): x = x+1 mod coresize
505  * DECMOD(x): x = x-1 mod coresize
506  * ADDMOD(z,x,y): z = x+y mod coresize
507  * SUBMOD(z,x,y): z = x-y mod coresize
508  */
509 
510 #define queue(x)  do { *w->tail++ = (x); if ( w->tail == queue_end )\
511                                           w->tail = queue_start; } while (0)
512 
513 #define INCMOD(x) do { if ( ++(x) == coresize ) (x) = 0; } while (0)
514 #define DECMOD(x) do { if ((x)-- == 0) (x) = coresize1; } while (0)
515 #define ADDMOD(z,x,y) do { (z) = (x)+(y); if ((z)>=coresize) (z) -= coresize; } while (0)
516 #define SUBMOD(z,x,y) do { (z) = (x)-(y); if ((int)(z)<0) (z) += coresize; } while (0)
517 
518 /* private macros to access p-space. */
519 #define UNSAFE_PSPACE_SET(warid, paddr, val) do {\
520     if (paddr) {\
521 	pspaces_[(warid)]->mem[(paddr)] = (val);\
522     } else {\
523 	pspaces_[(warid)]->lastresult = (val);\
524     }\
525 } while(0)
526 
527 #define UNSAFE_PSPACE_GET(warid, paddr) \
528 	( (paddr) ? pspaces_[(warid)]->mem[(paddr)]\
529 		  : pspaces_[(warid)]->lastresult )
530 
531 
532 int
sim_proper(unsigned int nwar,const field_t * war_pos_tab,unsigned int * death_tab)533 sim_proper( unsigned int nwar, const field_t *war_pos_tab,
534 	    unsigned int *death_tab )
535 {
536   /*
537    * Core and Process queue memories.
538    *
539    * The warriors share a common cyclic buffer for use as a process
540    * queue which the contains core addresses where active processes
541    * are.  The buffer has size N*P+1, where N = number of warriors,
542    * P = maximum number of processes / warrior.
543    *
544    * Each warrior has a fixed slice of the buffer for its own process
545    * queue which are initially allocated to the warriors in reverse
546    * order. i.e. if the are N warriors w1, w2, ..., wN, the slice for
547    * wN is 0..P-1, w{N-1} has P..2P-1, until w1 has (N-1)P..NP-1.
548    *
549    * The core address of the instruction is fetched from the head of
550    * the process queue and processes are pushed to the tail, so the
551    * individual slices slide along at one location per executed
552    * instruction.  The extra '+1' in the buffer size is to have free
553    * space to slide the slices along.
554    *
555    * For two warriors w1, w2:
556    *
557    * |\......../|\......../| |
558    * | w2 queue | w1 queue | |
559    * 0          P         2P 2P+1
560    */
561 
562   insn_t *core;
563   field_t *queue_start, *queue_end; /* queue mem. start, end */
564 
565   /*
566    * Cache Registers.
567    *
568    * The '94 draft specifies that the redcode processor model be
569    * 'in-register'.  That is, the current instruction and the
570    * instructions at the effective addresses (ea's) be cached in
571    * registers during instruction execution, rather than have
572    * core memory accessed directly when the operands are needed.  This
573    * causes differences from the 'in-memory' model.  e.g. MOV 0,>0
574    * doesn't change the instruction's b-field since the instruction at
575    * the a-field's effective address (i.e. the instruction itself) was
576    * cached before the post-increment happened.
577    *
578    * There are conceptually three registers: IN, A, and B.  IN is the
579    * current instruction, and A, B are the ones at the a- and
580    * b-fields' effective addresses respectively.
581    *
582    * We don't actually cache the complete instructions, but rather
583    * only the *values* of their a- and b-field.  This is because
584    * currently there is no way effective address computations can
585    * modify the opcode, modifier, or addressing modes of an
586    * instruction.
587    */
588 
589   u32_t in;			/* 'in' field of current insn for decoding */
590   u32_t ra_a, ra_b,		/* A register values */
591         rb_a, rb_b;		/* B register values */
592   u32_t da, db;			/* effective address of instruction's
593 				 * a- and b-fields */
594   /*
595    * alias some register names to real variables
596    */
597 #define in_a ra_a
598 #define in_b rb_b
599 
600   field_t *pofs;
601   insn_t *pt;
602   unsigned int mode;
603 
604   /*
605    * misc.
606    */
607   w_t *w;			/* current warrior */
608   u32_t ip;			/* current instruction pointer */
609   u32_t ftmp, t;		/* temps */
610   unsigned int coresize, coresize1; /* size of core, size of core - 1 */
611   int cycles;			/* instructions to execute before tie counter*/
612   int alive_cnt;
613   pspace_t **pspaces_;
614   u32_t pspacesize;
615 
616 #if DEBUG >= 1
617   insn_t insn;			/* used for disassembly */
618   char debug_line[256];		/* ditto */
619 #endif
620 
621 
622   /*
623    * Setup queue and core pointers
624    */
625   alive_cnt = nwar;
626   cycles = nwar * Cycles; /* set instruction executions until tie counter */
627   coresize = Coresize;
628   coresize1 = Coresize-1;
629   core = Core_Mem;
630   queue_start = Queue_Mem;
631   queue_end = Queue_Mem + NWarriors*Processes+1;
632   pspaces_ = PSpaces;
633   pspacesize = PSpace_size;
634 
635   /* Setup War_Tab and links all around */
636   pofs = queue_end-1;		/* init. wars[] table */
637   War_Tab[0].succ = &War_Tab[nwar-1];
638   War_Tab[nwar-1].pred = &War_Tab[0];
639   t = nwar-1;
640   ftmp = 0;
641   do {
642     if ( t > 0 ) War_Tab[t].succ = &War_Tab[t-1];
643     if ( t < nwar-1 ) War_Tab[t].pred = &War_Tab[t+1];
644     pofs -= Processes;
645     *pofs = war_pos_tab[ftmp];
646     War_Tab[t].head = pofs;
647     War_Tab[t].tail = pofs+1;
648     War_Tab[t].nprocs = 1;
649     War_Tab[t].id = ftmp;
650     --t;
651     ftmp++;
652   } while ( ftmp < nwar );
653 
654 
655   /*
656    * Main loop is run for each executed instruction
657    */
658   w = &War_Tab[ nwar-1 ];
659   do {
660 
661     ip = *w->head++;		/* load current instruction */
662     if ( w->head == queue_end ) w->head = queue_start;
663     in = core[ ip ].in;		/* note: flags must be unset! */
664 #if !SIM_STRIP_FLAGS
665     in = in & iMASK;		/* strip flags. */
666 #endif
667     in_a = core[ ip ].a;
668     in_b = core[ ip ].b;
669 
670 #if DEBUG >= 1
671     insn = core[ip];
672     dis1( debug_line, insn, coresize);
673 #endif
674 
675     /*
676      * a-field effective address calculation.
677      *
678      * This bit was assimilated from Ken Espiritu's
679      * pmars 0.8.6 patch.  Ditto for the b-field
680      * calculation.
681      */
682     mode = in & mMASK;
683     in >>= mBITS;		/* shift out a-mode bits */
684     rb_a = in_a;		/* necessary if B-mode is immediate */
685 
686     if ( mode != IMMEDIATE ) {
687       ADDMOD( da, in_a, ip );
688       pt = core+da;		/* pt is where field points to (without */
689 				/* indirection). */
690 
691       if ( mode != DIRECT ) {
692 	if ( INDIR_A(mode) ) {
693 	  mode = RAW_MODE(mode);
694 	  pofs = &(pt->a);
695 	} else
696 	  pofs = &(pt->b);
697 
698 	t = *pofs;		/* pofs is the indirection offset */
699 	if (mode == PREDEC) {
700 	  DECMOD(t);
701 	  *pofs = t;
702 	}
703 	ADDMOD(da, t, da);
704 
705 	pt = core+da;		/* now pt is the final destination */
706 				/* of indiretion. */
707 	ra_a = pt->a;		/* read in registers */
708 	ra_b = pt->b;
709 
710 	if (mode == POSTINC) {
711 	  INCMOD(t);
712 	  *pofs = t;
713 	}
714       } else /* DIRECT */{
715 	ra_a = pt->a;
716 	ra_b = pt->b;
717       }
718     } else /* IMMEDIATE */ {
719       ra_b = in_b;
720       da = ip;
721     }
722 
723 
724     /*
725      * b-field effective address calculation
726      */
727     mode = in & mMASK;
728     in >>= mBITS;		/* shift out b-mode bits */
729 
730     if ( mode != IMMEDIATE ) {
731       ADDMOD( db, in_b, ip );
732       pt = core+db;
733 
734       if ( mode != DIRECT ) {
735 	if ( INDIR_A(mode) ) {
736 	  mode = RAW_MODE(mode);
737 	  pofs = &(pt->a);
738 	} else
739 	  pofs = &(pt->b);
740 
741 	t = *pofs;
742 	if (mode == PREDEC) {
743 	  DECMOD(t);
744 	  *pofs = t;
745 	}
746 	ADDMOD(db, t, db);
747 
748 	pt = core+db;
749 	rb_a = pt->a;
750 	rb_b = pt->b;
751 
752 	if (mode == POSTINC) {
753 	  INCMOD(t);
754 	  *pofs = t;
755 	}
756       } else /* DIRECT */{
757 	rb_a = pt->a;
758 	rb_b = pt->b;
759       }
760     } else /* IMMEDIATE */ {
761       db = ip;
762     }
763 
764 #if DEBUG == 2
765     /* Debug output */
766     printf("%6d %4ld  %s  |%4ld, d %4ld,%4ld a %4ld,%4ld b %4ld,%4ld\n",
767 	   cycles, ip, debug_line,
768 	   w->nprocs, da, db,
769 	   ra_a, ra_b, rb_a, rb_b );
770 #endif
771 
772     if(step_cb) { // exec_trail support
773         step_cb(in);
774     }
775 
776     /*
777      * Execute the instruction on opcode.modifier
778      */
779     switch ( in ) {
780 
781     case _OP(DAT, mA):
782     case _OP(DAT, mB):
783     case _OP(DAT, mAB):
784     case _OP(DAT, mBA):
785     case _OP(DAT, mX):
786     case _OP(DAT, mF):
787     case _OP(DAT, mI):
788     die:
789       if ( --w->nprocs > 0 )
790 	goto noqueue;
791       w->pred->succ = w->succ;
792       w->succ->pred = w->pred;
793       *death_tab++ = w->id;
794       cycles = cycles - cycles/alive_cnt; /* nC+k -> (n-1)C+k */
795       if ( --alive_cnt <= 1 )
796 	goto out;
797       goto noqueue;
798 
799 
800     case _OP(SPL, mA):
801     case _OP(SPL, mB):
802     case _OP(SPL, mAB):
803     case _OP(SPL, mBA):
804     case _OP(SPL, mX):
805     case _OP(SPL, mF):
806     case _OP(SPL, mI):
807       INCMOD(ip);
808       queue(ip);
809       if ( w->nprocs < Processes ) {
810 	++w->nprocs;
811 	queue(da);
812       }
813       goto noqueue;
814 
815 
816     case _OP(MOV, mA):
817       core[db].a = ra_a;
818       break;
819     case _OP(MOV, mF):
820       core[db].a = ra_a;
821     case _OP(MOV, mB):
822       core[db].b = ra_b;
823       break;
824     case _OP(MOV, mAB):
825       core[db].b = ra_a;
826       break;
827     case _OP(MOV, mX):
828       core[db].b = ra_a;
829     case _OP(MOV, mBA):
830       core[db].a = ra_b;
831       break;
832     case _OP(MOV, mI):
833       core[db].a = ra_a;
834       core[db].b = ra_b;
835       core[db].in = core[da].in;
836       break;
837 
838 
839     case _OP(DJN,mBA):
840     case _OP(DJN,mA):
841       t = core[db].a;
842       DECMOD(t);
843       core[db].a = t;
844       if ( rb_a == 1 ) break;
845       queue(da);
846       goto noqueue;
847 
848     case _OP(DJN,mAB):
849     case _OP(DJN,mB):
850       t = core[db].b;
851       DECMOD(t);
852       core[db].b = t;
853       if ( rb_b == 1 ) break;
854       queue(da);
855       goto noqueue;
856 
857     case _OP(DJN,mX):
858     case _OP(DJN,mI):
859     case _OP(DJN,mF):
860       t = core[db].a;      DECMOD(t);      core[db].a = t;
861       t = core[db].b;      DECMOD(t);      core[db].b = t;
862       if ( rb_a == 1 && rb_b == 1 ) break;
863       queue(da);
864       goto noqueue;
865 
866 
867     case _OP(ADD, mI):
868     case _OP(ADD, mF):
869       ADDMOD( t, ra_b, rb_b ); core[db].b = t;
870     case _OP(ADD, mA):
871       ADDMOD( t, ra_a, rb_a ); core[db].a = t;
872       break;
873     case _OP(ADD, mB):
874       ADDMOD( t, ra_b, rb_b ); core[db].b = t;
875       break;
876     case _OP(ADD, mX):
877       ADDMOD( t, ra_b, rb_a ); core[db].a = t;
878     case _OP(ADD, mAB):
879       ADDMOD( t, ra_a, rb_b ); core[db].b = t;
880       break;
881     case _OP(ADD, mBA):
882       ADDMOD( t, ra_b, rb_a ); core[db].a = t;
883       break;
884 
885 
886     case _OP(JMZ, mBA):
887     case _OP(JMZ, mA):
888       if ( rb_a )
889 	break;
890       queue(da);
891       goto noqueue;
892 
893     case _OP(JMZ, mAB):
894     case _OP(JMZ, mB):
895       if ( rb_b )
896 	break;
897       queue(da);
898       goto noqueue;
899 
900     case _OP(JMZ, mX):
901     case _OP(JMZ, mF):
902     case _OP(JMZ, mI):
903       if ( rb_a || rb_b )
904 	break;
905       queue(da);
906       goto noqueue;
907 
908 
909     case _OP(SUB, mI):
910     case _OP(SUB, mF):
911       SUBMOD( t, rb_b, ra_b ); core[db].b = t;
912     case _OP(SUB, mA):
913       SUBMOD( t, rb_a, ra_a); core[db].a = t;
914       break;
915     case _OP(SUB, mB):
916       SUBMOD( t, rb_b, ra_b ); core[db].b = t;
917       break;
918     case _OP(SUB, mX):
919       SUBMOD( t, rb_a, ra_b ); core[db].a = t;
920     case _OP(SUB, mAB):
921       SUBMOD( t, rb_b, ra_a ); core[db].b = t;
922       break;
923     case _OP(SUB, mBA):
924       SUBMOD( t, rb_a, ra_b ); core[db].a = t;
925       break;
926 
927 
928     case _OP(SEQ, mA):
929       if ( ra_a == rb_a )
930 	INCMOD(ip);
931       break;
932     case _OP(SEQ, mB):
933       if ( ra_b == rb_b )
934 	INCMOD(ip);
935       break;
936     case _OP(SEQ, mAB):
937       if ( ra_a == rb_b )
938 	INCMOD(ip);
939       break;
940     case _OP(SEQ, mBA):
941       if ( ra_b == rb_a )
942 	INCMOD(ip);
943       break;
944 
945     case _OP(SEQ, mI):
946 #if !SIM_STRIP_FLAGS
947 #define mask  (1<<(flPOS)-1)
948       if ( core[da].in & bitmask != core[db].in & bitmask )
949 	break;
950 #else
951       if ( core[da].in != core[db].in )
952 	break;
953 #endif
954       /* fallthrough */
955     case _OP(SEQ, mF):
956       if ( ra_a == rb_a && ra_b == rb_b )
957 	INCMOD(ip);
958       break;
959     case _OP(SEQ, mX):
960       if ( ra_a == rb_b && ra_b == rb_a )
961 	INCMOD(ip);
962       break;
963 
964 
965     case _OP(SNE, mA):
966       if ( ra_a != rb_a )
967 	INCMOD(ip);
968       break;
969     case _OP(SNE, mB):
970       if ( ra_b != rb_b )
971 	INCMOD(ip);
972       break;
973     case _OP(SNE, mAB):
974       if ( ra_a != rb_b )
975 	INCMOD(ip);
976       break;
977     case _OP(SNE, mBA):
978       if ( ra_b != rb_a )
979 	INCMOD(ip);
980       break;
981 
982     case _OP(SNE, mI):
983       if ( core[da].in != core[db].in ) {
984 	INCMOD(ip);
985 	break;
986       }
987       /* fall through */
988     case _OP(SNE, mF):
989       if ( ra_a != rb_a || ra_b != rb_b )
990 	INCMOD(ip);
991       break;
992     case _OP(SNE, mX):
993       if ( ra_a != rb_b || ra_b != rb_a )
994 	INCMOD(ip);
995       break;
996 
997 
998     case _OP(JMN, mBA):
999     case _OP(JMN, mA):
1000       if (! rb_a )
1001 	break;
1002       queue(da);
1003       goto noqueue;
1004 
1005     case _OP(JMN, mAB):
1006     case _OP(JMN, mB):
1007       if (! rb_b )
1008 	break;
1009       queue(da);
1010       goto noqueue;
1011 
1012     case _OP(JMN, mX):
1013     case _OP(JMN, mF):
1014     case _OP(JMN, mI):
1015       if (!rb_a && !rb_b)
1016 	break;
1017       queue(da);
1018       goto noqueue;
1019 
1020 
1021     case _OP(JMP, mA):
1022     case _OP(JMP, mB):
1023     case _OP(JMP, mAB):
1024     case _OP(JMP, mBA):
1025     case _OP(JMP, mX):
1026     case _OP(JMP, mF):
1027     case _OP(JMP, mI):
1028       queue(da);
1029       goto noqueue;
1030 
1031 
1032 
1033     case _OP(SLT, mA):
1034       if (ra_a < rb_a)
1035 	INCMOD(ip);
1036       break;
1037     case _OP(SLT, mAB):
1038       if (ra_a < rb_b)
1039 	INCMOD(ip);
1040       break;
1041     case _OP(SLT, mB):
1042       if (ra_b < rb_b)
1043 	INCMOD(ip);
1044       break;
1045     case _OP(SLT, mBA):
1046       if (ra_b < rb_a)
1047 	INCMOD(ip);
1048       break;
1049     case _OP(SLT, mI):
1050     case _OP(SLT, mF):
1051       if (ra_a < rb_a && ra_b < rb_b)
1052 	INCMOD(ip);
1053       break;
1054     case _OP(SLT, mX):
1055       if (ra_a < rb_b && ra_b < rb_a)
1056 	INCMOD(ip);
1057       break;
1058 
1059 
1060     case _OP(MODM, mI):
1061     case _OP(MODM, mF):
1062       ftmp = 0;			/* we must try to do both modulos even if
1063 				   one fails */
1064       if ( ra_a ) core[db].a = rb_a % ra_a; else ftmp = 1;
1065       if ( ra_b ) core[db].b = rb_b % ra_b; else ftmp = 1;
1066       if ( ftmp ) goto die;
1067       break;
1068     case _OP(MODM, mX):
1069       ftmp = 0;			/* we must try to do both modulos even if
1070 				   one fails */
1071       if ( ra_b ) core[db].a = rb_a%ra_b; else ftmp = 1;
1072       if ( ra_a ) core[db].b = rb_b%ra_a; else ftmp = 1;
1073       if ( ftmp ) goto die;
1074       break;
1075     case _OP(MODM, mA):
1076       if ( !ra_a ) goto die;
1077       core[db].a = rb_a % ra_a;
1078       break;
1079     case _OP(MODM, mB):
1080       if ( !ra_b ) goto die;
1081       core[db].b = rb_b % ra_b;
1082       break;
1083     case _OP(MODM, mAB):
1084       if ( !ra_a ) goto die;
1085       core[db].b = rb_b % ra_a;
1086       break;
1087     case _OP(MODM, mBA):
1088       if ( !ra_b ) goto die;
1089       core[db].a = rb_a % ra_b;
1090       break;
1091 
1092 
1093     case _OP(MUL, mI):
1094     case _OP(MUL, mF):
1095       core[db].b = (rb_b * ra_b) % coresize;
1096     case _OP(MUL, mA):
1097       core[db].a = (rb_a * ra_a) % coresize;
1098       break;
1099     case _OP(MUL, mB):
1100       core[db].b = (rb_b * ra_b) % coresize;
1101       break;
1102     case _OP(MUL, mX):
1103       core[db].a = (rb_a * ra_b) % coresize;
1104     case _OP(MUL, mAB):
1105       core[db].b = (rb_b * ra_a) % coresize;
1106       break;
1107     case _OP(MUL, mBA):
1108       core[db].a = (rb_a * ra_b) % coresize;
1109       break;
1110 
1111 
1112     case _OP(DIV, mI):
1113     case _OP(DIV, mF):
1114       ftmp = 0;			/* we must try to do both divisions even if
1115 				   one fails */
1116       if ( ra_a ) core[db].a = rb_a / ra_a; else ftmp = 1;
1117       if ( ra_b ) core[db].b = rb_b / ra_b; else ftmp = 1;
1118       if ( ftmp ) goto die;
1119       break;
1120     case _OP(DIV, mX):
1121       ftmp = 0;			/* we must try to do both divisions even if
1122 				   one fails */
1123       if ( ra_b ) core[db].a = rb_a / ra_b; else ftmp = 1;
1124       if ( ra_a ) core[db].b = rb_b / ra_a; else ftmp = 1;
1125       if ( ftmp ) goto die;
1126       break;
1127     case _OP(DIV, mA):
1128       if ( !ra_a ) goto die;
1129       core[db].a = rb_a / ra_a;
1130       break;
1131     case _OP(DIV, mB):
1132       if ( !ra_b ) goto die;
1133       core[db].b = rb_b / ra_b;
1134       break;
1135     case _OP(DIV, mAB):
1136       if ( !ra_a ) goto die;
1137       core[db].b = rb_b / ra_a;
1138       break;
1139     case _OP(DIV, mBA):
1140       if ( !ra_b ) goto die;
1141       core[db].a = rb_a / ra_b;
1142       break;
1143 
1144 
1145     case _OP(NOP,mI):
1146     case _OP(NOP,mX):
1147     case _OP(NOP,mF):
1148     case _OP(NOP,mA):
1149     case _OP(NOP,mAB):
1150     case _OP(NOP,mB):
1151     case _OP(NOP,mBA):
1152       break;
1153 
1154     case _OP(LDP,mA):
1155       ftmp = ra_a % pspacesize;
1156       core[db].a = UNSAFE_PSPACE_GET(w->id, ftmp);
1157       break;
1158     case _OP(LDP,mAB):
1159       ftmp = ra_a % pspacesize;
1160       core[db].b = UNSAFE_PSPACE_GET(w->id, ftmp);
1161       break;
1162     case _OP(LDP,mBA):
1163       ftmp = ra_b % pspacesize;
1164       core[db].a = UNSAFE_PSPACE_GET(w->id, ftmp);
1165       break;
1166     case _OP(LDP,mF):
1167     case _OP(LDP,mX):
1168     case _OP(LDP,mI):
1169     case _OP(LDP,mB):
1170       ftmp = ra_b % pspacesize;
1171       core[db].b = UNSAFE_PSPACE_GET(w->id, ftmp);
1172       break;
1173 
1174     case _OP(STP,mA):
1175       ftmp = rb_a % pspacesize;
1176       UNSAFE_PSPACE_SET(w->id, ftmp, ra_a);
1177       break;
1178     case _OP(STP,mAB):
1179       ftmp = rb_b % pspacesize;
1180       UNSAFE_PSPACE_SET(w->id, ftmp, ra_a);
1181       break;
1182     case _OP(STP,mBA):
1183       ftmp = rb_a % pspacesize;
1184       UNSAFE_PSPACE_SET(w->id, ftmp, ra_b);
1185       break;
1186     case _OP(STP,mF):
1187     case _OP(STP,mX):
1188     case _OP(STP,mI):
1189     case _OP(STP,mB):
1190       ftmp = rb_b % pspacesize;
1191       UNSAFE_PSPACE_SET(w->id, ftmp, ra_b);
1192       break;
1193 
1194 #if DEBUG > 0
1195     default:
1196       alive_cnt = -1;
1197       goto out;
1198 #endif
1199     }
1200 
1201     INCMOD(ip);
1202     queue(ip);
1203   noqueue:
1204     w = w->succ;
1205   } while(--cycles>0);
1206 
1207  out:
1208 #if DEBUG == 1
1209   printf("cycles: %d\n", cycles);
1210 #endif
1211   return alive_cnt;
1212 }
1213