1 /* asm.c: primitive redcode assembler
2  * $Id: asm.c,v 1.8 2002/10/01 22:24:46 rowan Exp $
3  */
4 
5 /* This file is part of `exhaust', a memory array redcode simulator.
6  * Author: M Joonas Pihlaja
7  * Public Domain.
8  */
9 
10 /* The format of lines with instructions should be:
11  *
12  * [START]	OPCODE.MODIFIER   A-MODE INT , B-MODE INT
13  *
14  * The ORG pseudo-op is ignored, as is the label after an optional
15  * END (if given). The only label recognised is START.  No fuss over
16  * the amount of white space, as long as it exists where required.
17  *
18  * Comments are recognised and discarded as is any line starting
19  * with "Program".  The output from `pmars -r 0 Your_Real_Source.red'
20  * should assemble fine with this tiny assembler.
21  *
22  *
23  * Functions in this file:
24  *
25  *     asm_line(), asm_file(), asm_fname(), dis1(),
26  *     discore()
27  */
28 #include <stdio.h>
29 #ifdef SYSV
30 #include <strings.h>
31 #else
32 #include <string.h>
33 #endif
34 #include <stdlib.h>
35 #include <ctype.h>
36 
37 #include "exhaust.h"
38 #include "insn.h"
39 #include "asm.h"
40 
41 /* str_tok_t: container for tokens we identify.
42  */
43 typedef struct str_toks_st {
44   char *s;			/* name of token */
45   int c;			/* token code */
46 } str_toks_t;
47 
48 /* Data
49  *
50  * tok_buf[]: globally used to keep the contents of string tokens
51  * tok_int:   if the token was a TOK_INT, the value of the token is here
52  *
53  * str_toks[]: table of multicharacter tokens we identify
54  *
55  */
56 
57 #define MAX_ALL_CHARS 256
58 static char tok_buf[MAX_ALL_CHARS];
59 static int tok_int;
60 
61 static str_toks_t str_toks[] = {
62     { "DAT", TOK_DAT },		/* opcodes */
63     { "SPL", TOK_SPL },
64     { "MOV", TOK_MOV },
65     { "DJN", TOK_DJN },
66     { "ADD", TOK_ADD },
67     { "JMZ", TOK_JMZ },
68     { "SUB", TOK_SUB },
69     { "MOD", TOK_MOD },
70     { "CMP", TOK_SEQ },
71     { "SEQ", TOK_SEQ },
72     { "JMP", TOK_JMP },
73     { "JMN", TOK_JMN },
74     { "SNE", TOK_SNE },
75     { "MUL", TOK_MUL },
76     { "DIV", TOK_DIV },
77     { "SLT", TOK_SLT },
78     { "NOP", TOK_NOP },
79     { "LDP", TOK_LDP },
80     { "STP", TOK_STP },
81 
82     { "ORG", TOK_ORG },		/* pseudo-ops */
83     { "END", TOK_END },
84     { "PIN", TOK_PIN },
85     { "START", TOK_START },
86 
87     { "F", TOK_mF },		/* modifiers */
88     { "A", TOK_mA },
89     { "B", TOK_mB },
90     { "AB", TOK_mAB },
91     { "BA", TOK_mBA },
92     { "X", TOK_mX },
93     { "I", TOK_mI },
94     { NULL, 0 }			/* sentinel */
95 };
96 
97 
98 
99 /* NAME
100  *     get_tok -- read the next token from a string
101  *
102  * SYNOPSIS
103  *     const char *get_tok( const char *s, int *tok );
104  *
105  * INPUTS
106  *     s -- string to read token from
107  *     tok -- where we store the token code of the read token
108  *
109  * RESULTS
110  *     The token code of the read token is stored into *tok,
111  *     with 0 signifying end of input.
112  *
113  *     If the token was an integer, its value is stored into
114  *     the global `tok_int'.  Integers may be in any base >= 10
115  *     as according to strtol().
116  *
117  *     String tokens are converted to upper case when storing
118  *     them into the global `tok_str[]'.  They are concatenated
119  *     at 255 characters.
120  *
121  * RETURN VALUE
122  *      Pointer to the character past the read token, or
123  *	to the nul character if at end of input.
124  *
125  * GLOBALS
126  *     tok_buf[]    -- a string or char token is copied here
127  *     tok_int      -- the value of an integer token
128  *     str_toks[]   -- used to identify string tokens
129  */
130 
131 /* skip_white(): returns ptr. to next non-whitespace char in s */
132 static
133 const char *
skip_white(const char * s)134 skip_white(const char *s)
135 {
136   while ( isspace(*s) ) s++;
137   return s;
138 }
139 
140 static
141 const char *
get_tok(const char * s,int * tok)142 get_tok( const char *s, int *tok )
143 {
144   char *tok_str = tok_buf;
145   int i;
146 
147   s = skip_white(s);
148   if ( *s == 0 )    return (*tok = 0, s);
149 
150   /*
151    * Tokenize strings.
152    *
153    * String tokens must start with a letter and consist of
154    * letters, digits, and underscores.  Strings are
155    * converted to upper case.
156    */
157   tok_buf[1] = tok_buf[0] = 0;
158 
159   i = 0;
160   if ( isalpha(*s) )
161     while ( (isalnum(*s) || *s == '_') && ++i < MAX_ALL_CHARS )
162       *tok_str++ = toupper(*s++);
163   *tok_str = 0;
164 
165   if ( tok_str > tok_buf ) {
166     /*
167      * was a string token -- identify it by searching through
168      * the str_toks[] array.
169      */
170     for ( i = 0; str_toks[i].s ; i++ ) {
171       if ( 0 == strcmp( str_toks[i].s, tok_buf ) ) {
172 	*tok = str_toks[i].c;
173 	return s;
174       }
175     }
176     *tok = TOK_STR;		/* normal string, not special */
177     return s;
178   }
179 
180 
181   /*
182    * Tokenize ints.
183    * Must match /-?[0-9]/
184    */
185   if ( isdigit(*s) ||  ( *s == '-' && isdigit(*(s+1)) )) {
186     char *endptr;
187     tok_int = strtol( s, &endptr, 0 );
188     *tok = TOK_INT;
189     return endptr;
190   }
191 
192 
193   /*
194    * Tokenize addressing modes and pass single chars
195    */
196 
197   tok_buf[0] = *s;		/* store char value as single */
198   tok_buf[1] = 0;		/* char string. */
199 
200   switch ( *tok = *s++ ) {
201   case '$': *tok = TOK_DIRECT;          break;
202   case '#': *tok = TOK_IMMEDIATE;	break;
203   case '*': *tok = TOK_AINDIRECT;	break;
204   case '@': *tok = TOK_BINDIRECT;	break;
205   case '{': *tok = TOK_APREDEC;		break;
206   case '<': *tok = TOK_BPREDEC;		break;
207   case '}': *tok = TOK_APOSTINC;	break;
208   case '>': *tok = TOK_BPOSTINC;	break;
209   }
210 
211   return s;
212 }
213 
214 
215 
216 /* NAME
217  *     panic_bad_token -- issue an error message for a bad token and exit(1)
218  *
219  * SYNOPSIS
220  *     void panic_bad_token( int tok, const char *expected );
221  *
222  * INPUTS
223  *     tok -- token code of unexpected token
224  *     expected -- a string describing what kind of token
225  *		   was expected.  e.g. "a modifier".
226  *
227  * RESULTS
228  *     A message Informing the user of the unexpected token,
229  *     its possible semantic value, and what type of token
230  *     was expected instead.
231  *
232  * GLOBALS
233  *     tok_buf, tok_int -- if the token has semantic value we look
234  *                         for it here.
235  * BUGS
236  *     The error message should be much better -- not even location
237  *     in the source is given here. *sigh*
238  */
239 static
240 void
panic_bad_token(int tok,const char * expected)241 panic_bad_token( int tok, const char *expected )
242 {
243   char *errstr = NULL;
244   char buf[30];
245 
246   memset(buf, 0, 30);
247 
248   /* make an errstr
249    */
250   if ( tok_buf[0] )
251     errstr = tok_buf;
252   if ( tok == TOK_INT ) {
253     sprintf(buf, "%d", tok_int );
254     errstr = buf;
255   }
256 
257   /* complain and exit with error code
258    */
259   fprintf(stderr, "token '%s' not %s\n", errstr, expected );
260   exit(1);
261 }
262 
263 
264 /* NAME
265  *     asm_line -- assemble a line to an instruction
266  *
267  * SYNOPSIS
268  *     int asm_line( const char *line, insn_t *in, unsigned int CORESIZE );
269  *
270  * INPUTS
271  *     line -- line to assemble
272  *     in   -- instruction to assemble into
273  *     CORESIZE -- size of core
274  *
275  * RESULTS
276  *     If there was anything to assemble, it is assembled into
277  *     `in'.  If there was a START label, the corresponding flag
278  *     is set in the instructions flags.  Incomplete or erroneous
279  *     input prompt a quick error message and exit(1).
280  *
281  *     If the 'ORG start-address' construct is encountered where
282  *     `start-address' is an integer, then the `in->a' field contains
283  *     the offset in instructions from the start of the warrior
284  *     where the warrior should start execution.
285  *
286  *     If 'PIN id' is encountered, where `id' is an integer, then the
287  *     `in->a' field contains the `id'.
288  *
289  * RETURN VALUE
290  *     ASMLINE_PIN  : pseudo-op 'PIN' encountered, id saved in `in->a'.
291  *     ASMLINE_ORG  : pseudo-op 'ORG' encountered, warrior start
292  *                    saved in `in->a'.
293  *     ASMLINE_DONE : done assembling, END opcode found, nothing assembled.
294  *     ASMLINE_NONE : nothing to assemble on this line.
295  *     ASMLINE_OK   : assembled instruction into `in' OK.
296  *
297  * GLOBALS
298  *     tok_int, tok_buf[], str_toks[] somewhere down the line.
299  */
300 
301 int
asm_line(const char * line,insn_t * in,unsigned int CORESIZE)302 asm_line( const char *line, insn_t *in, unsigned int CORESIZE  )
303 {
304   const char *s = line;
305   int tok;
306   int flags = 0;
307   int op, m, ma, mb;		/* opcode, modifier, a-mode, b-mode */
308 
309   s = get_tok( s, &tok );
310   if ( tok == 0 ) return ASMLINE_NONE;
311 
312   /*
313    * Ignore string lines '^Program.*' and comments.
314    */
315   if ( tok == TOK_STR && 0 == strcmp( "PROGRAM", tok_buf ))
316   {
317     return ASMLINE_NONE;
318   }
319   if ( tok == ';' ) return ASMLINE_NONE;
320 
321   /*
322    * Now match the instruction's various components:
323    *   [START label,] opcode, modifier, a-mode, a-value, b-mode, b-value
324    */
325 
326   /* Match possible start label
327    */
328   if ( tok == TOK_START ) {
329     flags |= fl_START;
330     s = get_tok( s, &tok );
331   }
332 
333   /* Match opcode
334    */
335   if ( is_tok_pseudoop(tok) ) {
336     switch ( tok ) {
337     case TOK_END:
338       return ASMLINE_DONE;	/* signal done assembling */
339 
340     case TOK_ORG:
341       s = get_tok( s, &tok );	/* get the next token */
342 
343       if ( tok == TOK_START )	/* ignore: */
344 	return ASMLINE_NONE;	/* start label already matched and processed */
345 
346       if ( tok != TOK_INT ) {
347 	panic_bad_token( tok, "an integer -- an int or \"START\" "
348 			 "follows ORG" );
349       }
350       in->a = tok_int;
351       return ASMLINE_ORG;
352 
353     case TOK_PIN:
354       s = get_tok( s, &tok );
355       if ( tok != TOK_INT ) {
356 	panic_bad_token( tok, "an integer -- PIN must be an unsigned integer");
357       }
358       in->a = tok_int;
359       return ASMLINE_PIN;
360 
361     default:
362       panic_bad_token( tok, "a pseudo-op (internal assembler error)" );
363     }
364   }
365   if (!( is_tok_opcode(tok)))
366     panic_bad_token( tok, "an opcode" );
367 
368   op = DAT;
369   switch(tok) {
370   case TOK_DAT: op = DAT; break;
371   case TOK_SPL: op = SPL; break;
372   case TOK_MOV: op = MOV; break;
373   case TOK_JMP: op = JMP; break;
374   case TOK_JMZ: op = JMZ; break;
375   case TOK_JMN: op = JMN; break;
376   case TOK_ADD: op = ADD; break;
377   case TOK_SUB: op = SUB; break;
378   case TOK_SEQ: op = SEQ; break;
379   case TOK_SNE: op = SNE; break;
380   case TOK_MUL: op = MUL; break;
381   case TOK_DIV: op = DIV; break;
382   case TOK_DJN: op = DJN; break;
383   case TOK_SLT: op = SLT; break;
384   case TOK_MOD: op = MODM; break;
385   case TOK_NOP: op = NOP; break;
386   case TOK_LDP: op = LDP; break;
387   case TOK_STP: op = STP; break;
388   default:
389     panic_bad_token( tok, "an opcode" );
390   }
391 
392   /* Match modifier
393    */
394   s = get_tok( s, &tok );	/* first the '.' */
395   if ( tok != '.' )
396     panic_bad_token( tok, "'.'" );
397 
398   s = get_tok( s, &tok );	/* then the modifier itself */
399   if ( ! is_tok_modifier(tok) )
400     panic_bad_token( tok, "a modifier");
401   m = tok - TOK_mF;
402 
403   /* Match a-field addressing mode and a-field
404    */
405   s = get_tok( s, &tok );
406   if ( ! is_tok_mode(tok) )
407     panic_bad_token( tok, "an addressing mode specifier");
408   ma = tok - TOK_DIRECT;
409 
410   s = get_tok( s, &tok );
411   if ( tok != TOK_INT )
412     panic_bad_token( tok, "an integer");
413   in->a = MODS(tok_int,CORESIZE);
414 
415   /* Match comma
416    */
417   s = get_tok( s, &tok );
418   if ( tok != ',' )
419     panic_bad_token( tok, "','" );
420 
421   /* Match b-field addressing mode and a-field
422    */
423   s = get_tok( s, &tok );
424   if ( ! is_tok_mode(tok) )
425     panic_bad_token( tok, "an addressing mode specifier");
426   mb = tok - TOK_DIRECT;
427 
428   s = get_tok( s, &tok );
429   if ( tok != TOK_INT )
430     panic_bad_token( tok, "an integer");
431   in->b = MODS(tok_int,CORESIZE);
432 
433 
434   /*
435    * Set flags and ignore the rest of the line
436    */
437   in->in = (flags << flPOS) | OP( op, m, ma, mb );
438   return ASMLINE_OK;
439 }
440 
441 
442 
443 
444 /* NAME
445  *     asm_file, asm_fname -- assemble a FILE into a warrior
446  *
447  * SYNOPSIS
448  *     void asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE );
449  *     void asm_fname( const char *filename, warrior_t *w,
450  *    	               unsigned int CORESIZE );
451  *
452  * INPUTS
453  *     w        -- warrior_t to assemble into.
454  *     F        -- stream to read warrior source from
455  *     filename -- path to source file.  May be '-'
456  *		   which is interpreted as stdin.
457  *     CORESIZE -- just that
458  *
459  * DESCRIPTION
460  *     These functions assemble a source file into a
461  *     warrior_t setting all the non-info fields.
462  *
463  * RESULTS
464  *    If the warrior assembled correctly, then warrior_t
465  *    contains its code and starting offset.  If an error
466  *    occured during assembly, an error message is issued
467  *    and the program exit()s.
468  *
469  * GLOBALS
470  *     none as such, subroutines use tok_buf[], tok_int, str_toks[],
471  *     MAXLENGTH constant
472  *
473  * SEE ALSO
474  *     asm_line()
475  *
476  * BUGS
477  *     Its not really acceptable to exit() on an assembly error.
478  */
479 void
asm_file(FILE * F,warrior_t * w,unsigned int CORESIZE)480 asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE )
481 {
482   char line[MAX_ALL_CHARS];
483   insn_t *c;
484   int ret;			/* return code from asm_line() */
485 
486   w->len = w->start = 0;
487   w->have_pin = 0;
488   w->pin = 0;
489   c = w->code;
490 
491   while ( fgets(line, MAX_ALL_CHARS, F) ) {
492     ret = asm_line( line, c, CORESIZE );
493     if ( ret == ASMLINE_DONE ) break;
494 
495     switch ( ret ) {
496     case ASMLINE_OK:
497       if ( get_flags( c->in ) & fl_START ) {
498 	w->start = w->len;
499 	clr_flags( c->in, fl_START );
500       }
501       if ( w->len < MAXLENGTH) c++;
502       w->len++;
503       break;
504 
505     case ASMLINE_ORG:
506       w->start =  c->a;		/* was `ORG int', get the starting address */
507       break;
508 
509     case ASMLINE_NONE:
510       break;			/* nop */
511 
512     case ASMLINE_PIN:
513       w->have_pin = 1;
514       w->pin = c->a;		/* save PIN. */
515       break;
516 
517     default:
518       fprintf(stderr,"asm.c/asm_file(): illegal return code from asm_line()\n");
519       exit(1);
520     }
521     if ( w->len > MAXLENGTH ) {
522       fprintf(stderr, "too many instructions in warrior %d\n", w->no);
523       exit(1);
524     }
525   }
526   if ( w->start >= w->len ) {
527     fprintf(stderr, "starting address must be inside warrior body\n" );
528     exit(1);
529   }
530 }
531 
532 
533 void
asm_fname(const char * fname,warrior_t * w,unsigned int CORESIZE)534 asm_fname( const char *fname, warrior_t *w, unsigned int CORESIZE )
535 {
536   FILE *F;
537   int is_stdin = 0;
538 
539   if ( strcmp( fname, "-" ) == 0 ) {
540     F = stdin;
541     is_stdin = 1;
542   }
543   else
544     if (!( F = fopen(fname, "r") )) {
545       fprintf(stderr, "can't open file %s\n", fname);
546       exit(1);
547     }
548 
549   asm_file(F, w, CORESIZE);
550 
551   if ( !is_stdin ) fclose(F);
552 }
553 
554 
555 
556 /* NAME
557  *     dis1 -- disasemble an instruction
558  *     discore -- disasemble a segment of core
559  *
560  * SYNOPSIS
561  *     void dis1( char *s, inst_t in, unsigned int CORESIZE );
562  *     void discore( inst_t *core, unsigned int start, unsigned int end,
563  * 		     unsigned int CORESIZE );
564  *
565  * INPUTS
566  *     s -- string to print disassembled instruction to. A string
567  *          of length 60 should be more than sufficient.
568  *     in -- instruction to disassemble
569  *     core -- pointer to start of core
570  *     start -- core segment start offset
571  *     end -- core segment end offset (excluded)
572  *
573  * RESULTS
574  *     dis1 -- The disassembled instruction is printed to `s'.
575  *     discore -- A segment of core is dissasembled and printed
576  *                to stdout with core addresses.
577  */
578 
579 void
dis1(char * buf,insn_t in,unsigned int CORESIZE)580 dis1(char *buf, insn_t in, unsigned int CORESIZE)
581 {
582   int x;
583   char *op_s, *mo_s, *ma_s, *mb_s;
584   int af, bf;
585 
586   x = (in.in >> opPOS) & opMASK;
587   switch( x ) {
588   case DAT: op_s = "dat"; break;
589   case SPL: op_s = "spl"; break;
590   case MOV: op_s = "mov"; break;
591   case JMP: op_s = "jmp"; break;
592   case JMZ: op_s = "jmz"; break;
593   case JMN: op_s = "jmn"; break;
594   case ADD: op_s = "add"; break;
595   case SUB: op_s = "sub"; break;
596   case SEQ: op_s = "seq"; break;
597   case SNE: op_s = "sne"; break;
598   case MUL: op_s = "mul"; break;
599   case DIV: op_s = "div"; break;
600   case DJN: op_s = "djn"; break;
601   case SLT: op_s = "slt"; break;
602   case MODM: op_s = "mod"; break;
603   case NOP: op_s = "nop"; break;
604   case LDP: op_s = "ldp"; break;
605   case STP: op_s = "stp"; break;
606   default:
607     op_s = "???";
608   }
609 
610   x = (in.in >> moPOS) & moMASK;
611   switch ( x ) {
612   case mF:  mo_s = "f "; break;
613   case mA:  mo_s = "a "; break;
614   case mB:  mo_s = "b "; break;
615   case mAB: mo_s = "ab"; break;
616   case mBA: mo_s = "ba"; break;
617   case mX:  mo_s = "x "; break;
618   case mI:  mo_s = "i "; break;
619   default:
620     mo_s = "?";
621   }
622 
623 
624   x = (in.in >> maPOS) & mMASK;
625   switch (x) {
626   case DIRECT: ma_s = "$"; break;
627   case IMMEDIATE: ma_s = "#"; break;
628   case AINDIRECT: ma_s = "*"; break;
629   case BINDIRECT: ma_s = "@"; break;
630   case APREDEC: ma_s = "{"; break;
631   case APOSTINC: ma_s = "}"; break;
632   case BPREDEC: ma_s = "<"; break;
633   case BPOSTINC: ma_s = ">"; break;
634   default: ma_s = "?";
635   }
636 
637   x = (in.in >> mbPOS) & mMASK;
638   switch (x) {
639   case DIRECT:    mb_s = "$"; break;
640   case IMMEDIATE: mb_s = "#"; break;
641   case AINDIRECT: mb_s = "*"; break;
642   case BINDIRECT: mb_s = "@"; break;
643   case APREDEC:   mb_s = "{"; break;
644   case APOSTINC:  mb_s = "}"; break;
645   case BPREDEC:   mb_s = "<"; break;
646   case BPOSTINC:  mb_s = ">"; break;
647   default: mb_s = "?";
648   }
649 
650   af = in.a <= CORESIZE/2 ? in.a : in.a - CORESIZE;
651   bf = in.b <= CORESIZE/2 ? in.b : in.b - CORESIZE;
652 
653   sprintf(buf,"%s.%s %s%5d , %s%5d", op_s, mo_s, ma_s, af, mb_s, bf);
654 }
655 
656 
657 void
discore(const insn_t * core,int start,int end,unsigned int CORESIZE)658 discore( const insn_t *core,
659 	 int start,
660 	 int end,
661 	 unsigned int CORESIZE )
662 {
663   int adr;
664   char line[MAX_ALL_CHARS];
665   for ( adr = start; adr < end; adr++ ) {
666     int i = MODS(adr, CORESIZE);
667     dis1( line, core[i], CORESIZE );
668     printf("%4d    %s\n", adr, line);
669   }
670 }
671