1 /* asm.c: primitive redcode assembler
2 * $Id: asm.c,v 1.1.1.1 2003/08/26 16:57:02 varfar Exp $
3 */
4
5 /* This file is part of `exhaust', a memory array redcode simulator.
6 * Author: M Joonas Pihlaja
7 * Public Domain.
8 */
9
10 /* The format of lines with instructions should be:
11 *
12 * [START] OPCODE.MODIFIER A-MODE INT , B-MODE INT
13 *
14 * The ORG pseudo-op is ignored, as is the label after an optional
15 * END (if given). The only label recognised is START. No fuss over
16 * the amount of white space, as long as it exists where required.
17 *
18 * Comments are recognised and discarded as is any line starting
19 * with "Program". The output from `pmars -r 0 Your_Real_Source.red'
20 * should assemble fine with this tiny assembler.
21 *
22 *
23 * Functions in this file:
24 *
25 * asm_line(), asm_file(), asm_fname(), dis1(),
26 * discore()
27 */
28 #include <stdio.h>
29 #ifdef SYSV
30 #include <strings.h>
31 #else
32 #include <string.h>
33 #endif
34 #include <stdlib.h>
35 #include <ctype.h>
36
37 #include "exhaust.h"
38 #include "insn.h"
39 #include "asm.h"
40
41 /* str_tok_t: container for tokens we identify.
42 */
43 typedef struct str_toks_st {
44 char *s; /* name of token */
45 int c; /* token code */
46 } str_toks_t;
47
48 /* Data
49 *
50 * tok_buf[]: globally used to keep the contents of string tokens
51 * tok_int: if the token was a TOK_INT, the value of the token is here
52 *
53 * str_toks[]: table of multicharacter tokens we identify
54 *
55 */
56
57 #define MAX_ALL_CHARS 256
58 static char tok_buf[MAX_ALL_CHARS];
59 static int tok_int;
60
61 const char
62 *MNEMONIC_OPCODE[OPCODE_LAST] = {
63 "DAT","SPL","MOV","DJN","ADD","JMZ","SUB","SEQ","SNE","SLT","JMN","JMP","NOP","MUL","MOD","DIV","LDP","STP" },
64 *MNEMONIC_MODIFIER[MODIFIER_LAST] = {
65 "F","A","B","AB","BA","X","I"},
66 MNEMONIC_ADDRMODE[ADDRMODE_LAST] = {
67 '$','#','@','<','>','*','{','}'};
68
69 static str_toks_t str_toks[] = {
70 { "DAT", TOK_DAT }, /* opcodes */
71 { "SPL", TOK_SPL },
72 { "MOV", TOK_MOV },
73 { "DJN", TOK_DJN },
74 { "ADD", TOK_ADD },
75 { "JMZ", TOK_JMZ },
76 { "SUB", TOK_SUB },
77 { "MOD", TOK_MOD },
78 { "CMP", TOK_SEQ },
79 { "SEQ", TOK_SEQ },
80 { "JMP", TOK_JMP },
81 { "JMN", TOK_JMN },
82 { "SNE", TOK_SNE },
83 { "MUL", TOK_MUL },
84 { "DIV", TOK_DIV },
85 { "SLT", TOK_SLT },
86 { "NOP", TOK_NOP },
87 { "LDP", TOK_LDP },
88 { "STP", TOK_STP },
89
90 { "ORG", TOK_ORG }, /* pseudo-ops */
91 { "END", TOK_END },
92 { "PIN", TOK_PIN },
93 { "START", TOK_START },
94
95 { "F", TOK_mF }, /* modifiers */
96 { "A", TOK_mA },
97 { "B", TOK_mB },
98 { "AB", TOK_mAB },
99 { "BA", TOK_mBA },
100 { "X", TOK_mX },
101 { "I", TOK_mI },
102 { NULL, 0 } /* sentinel */
103 };
104
105
106
107 /* NAME
108 * get_tok -- read the next token from a string
109 *
110 * SYNOPSIS
111 * const char *get_tok( const char *s, int *tok );
112 *
113 * INPUTS
114 * s -- string to read token from
115 * tok -- where we store the token code of the read token
116 *
117 * RESULTS
118 * The token code of the read token is stored into *tok,
119 * with 0 signifying end of input.
120 *
121 * If the token was an integer, its value is stored into
122 * the global `tok_int'. Integers may be in any base >= 10
123 * as according to strtol().
124 *
125 * String tokens are converted to upper case when storing
126 * them into the global `tok_str[]'. They are concatenated
127 * at 255 characters.
128 *
129 * RETURN VALUE
130 * Pointer to the character past the read token, or
131 * to the nul character if at end of input.
132 *
133 * GLOBALS
134 * tok_buf[] -- a string or char token is copied here
135 * tok_int -- the value of an integer token
136 * str_toks[] -- used to identify string tokens
137 */
138
139 /* skip_white(): returns ptr. to next non-whitespace char in s */
140 static
141 const char *
skip_white(const char * s)142 skip_white(const char *s)
143 {
144 while ( isspace(*s) ) s++;
145 return s;
146 }
147
148 static
149 const char *
get_tok(const char * s,int * tok)150 get_tok( const char *s, int *tok )
151 {
152 char *tok_str = tok_buf;
153 int i;
154
155 s = skip_white(s);
156 if ( *s == 0 ) return (*tok = 0, s);
157
158 /*
159 * Tokenize strings.
160 *
161 * String tokens must start with a letter and consist of
162 * letters, digits, and underscores. Strings are
163 * converted to upper case.
164 */
165 tok_buf[1] = tok_buf[0] = 0;
166
167 i = 0;
168 if ( isalpha(*s) )
169 while ( (isalnum(*s) || *s == '_') && ++i < MAX_ALL_CHARS )
170 *tok_str++ = toupper(*s++);
171 *tok_str = 0;
172
173 if ( tok_str > tok_buf ) {
174 /*
175 * was a string token -- identify it by searching through
176 * the str_toks[] array.
177 */
178 for ( i = 0; str_toks[i].s ; i++ ) {
179 if ( 0 == strcmp( str_toks[i].s, tok_buf ) ) {
180 *tok = str_toks[i].c;
181 return s;
182 }
183 }
184 *tok = TOK_STR; /* normal string, not special */
185 return s;
186 }
187
188
189 /*
190 * Tokenize ints.
191 * Must match /-?[0-9]/
192 */
193 if ( isdigit(*s) || ( *s == '-' && isdigit(*(s+1)) )) {
194 char *endptr;
195 tok_int = strtol( s, &endptr, 0 );
196 *tok = TOK_INT;
197 return endptr;
198 }
199
200
201 /*
202 * Tokenize addressing modes and pass single chars
203 */
204
205 tok_buf[0] = *s; /* store char value as single */
206 tok_buf[1] = 0; /* char string. */
207
208 switch ( *tok = *s++ ) {
209 case '$': *tok = TOK_DIRECT; break;
210 case '#': *tok = TOK_IMMEDIATE; break;
211 case '*': *tok = TOK_AINDIRECT; break;
212 case '@': *tok = TOK_BINDIRECT; break;
213 case '{': *tok = TOK_APREDEC; break;
214 case '<': *tok = TOK_BPREDEC; break;
215 case '}': *tok = TOK_APOSTINC; break;
216 case '>': *tok = TOK_BPOSTINC; break;
217 }
218
219 return s;
220 }
221
222
223
224 /* NAME
225 * panic_bad_token -- issue an error message for a bad token and exit(1)
226 *
227 * SYNOPSIS
228 * void panic_bad_token( int tok, const char *expected );
229 *
230 * INPUTS
231 * tok -- token code of unexpected token
232 * expected -- a string describing what kind of token
233 * was expected. e.g. "a modifier".
234 *
235 * RESULTS
236 * A message Informing the user of the unexpected token,
237 * its possible semantic value, and what type of token
238 * was expected instead.
239 *
240 * GLOBALS
241 * tok_buf, tok_int -- if the token has semantic value we look
242 * for it here.
243 * BUGS
244 * The error message should be much better -- not even location
245 * in the source is given here. *sigh*
246 */
247 static
248 void
panic_bad_token(int tok,const char * expected)249 panic_bad_token( int tok, const char *expected )
250 {
251 char *errstr = NULL;
252 char buf[30];
253
254 memset(buf, 0, 30);
255
256 /* make an errstr
257 */
258 if ( tok_buf[0] )
259 errstr = tok_buf;
260 if ( tok == TOK_INT ) {
261 sprintf(buf, "%d", tok_int );
262 errstr = buf;
263 }
264
265 /* complain and exit with error code
266 */
267 fprintf(stderr, "token '%s' not %s\n", errstr, expected );
268 exit(1);
269 }
270
271
272 /* NAME
273 * asm_line -- assemble a line to an instruction
274 *
275 * SYNOPSIS
276 * int asm_line( const char *line, insn_t *in, unsigned int CORESIZE );
277 *
278 * INPUTS
279 * line -- line to assemble
280 * in -- instruction to assemble into
281 * CORESIZE -- size of core
282 *
283 * RESULTS
284 * If there was anything to assemble, it is assembled into
285 * `in'. If there was a START label, the corresponding flag
286 * is set in the instructions flags. Incomplete or erroneous
287 * input prompt a quick error message and exit(1).
288 *
289 * If the 'ORG start-address' construct is encountered where
290 * `start-address' is an integer, then the `in->a' field contains
291 * the offset in instructions from the start of the warrior
292 * where the warrior should start execution.
293 *
294 * If 'PIN id' is encountered, where `id' is an integer, then the
295 * `in->a' field contains the `id'.
296 *
297 * RETURN VALUE
298 * ASMLINE_PIN : pseudo-op 'PIN' encountered, id saved in `in->a'.
299 * ASMLINE_ORG : pseudo-op 'ORG' encountered, warrior start
300 * saved in `in->a'.
301 * ASMLINE_DONE : done assembling, END opcode found, nothing assembled.
302 * ASMLINE_NONE : nothing to assemble on this line.
303 * ASMLINE_OK : assembled instruction into `in' OK.
304 *
305 * GLOBALS
306 * tok_int, tok_buf[], str_toks[] somewhere down the line.
307 */
308
309 int
asm_line(const char * line,insn_t * in,unsigned int CORESIZE)310 asm_line( const char *line, insn_t *in, unsigned int CORESIZE )
311 {
312 const char *s = line;
313 int tok;
314 int flags = 0;
315 int op, m, ma, mb; /* opcode, modifier, a-mode, b-mode */
316
317 s = get_tok( s, &tok );
318 if ( tok == 0 ) return ASMLINE_NONE;
319
320 /*
321 * Ignore string lines '^Program.*' and comments.
322 */
323 if ( tok == TOK_STR && 0 == strcmp( "PROGRAM", tok_buf ))
324 {
325 return ASMLINE_NONE;
326 }
327 if ( tok == ';' ) return ASMLINE_NONE;
328
329 /*
330 * Now match the instruction's various components:
331 * [START label,] opcode, modifier, a-mode, a-value, b-mode, b-value
332 */
333
334 /* Match possible start label
335 */
336 if ( tok == TOK_START ) {
337 flags |= fl_START;
338 s = get_tok( s, &tok );
339 }
340
341 /* Match opcode
342 */
343 if ( is_tok_pseudoop(tok) ) {
344 switch ( tok ) {
345 case TOK_END:
346 return ASMLINE_DONE; /* signal done assembling */
347
348 case TOK_ORG:
349 s = get_tok( s, &tok ); /* get the next token */
350
351 if ( tok == TOK_START ) /* ignore: */
352 return ASMLINE_NONE; /* start label already matched and processed */
353
354 if ( tok != TOK_INT ) {
355 panic_bad_token( tok, "an integer -- an int or \"START\" "
356 "follows ORG" );
357 }
358 in->a = tok_int;
359 return ASMLINE_ORG;
360
361 case TOK_PIN:
362 s = get_tok( s, &tok );
363 if ( tok != TOK_INT ) {
364 panic_bad_token( tok, "an integer -- PIN must be an unsigned integer");
365 }
366 in->a = tok_int;
367 return ASMLINE_PIN;
368
369 default:
370 panic_bad_token( tok, "a pseudo-op (internal assembler error)" );
371 }
372 }
373 if (!( is_tok_opcode(tok)))
374 panic_bad_token( tok, "an opcode" );
375
376 op = DAT;
377 switch(tok) {
378 case TOK_DAT: op = DAT; break;
379 case TOK_SPL: op = SPL; break;
380 case TOK_MOV: op = MOV; break;
381 case TOK_JMP: op = JMP; break;
382 case TOK_JMZ: op = JMZ; break;
383 case TOK_JMN: op = JMN; break;
384 case TOK_ADD: op = ADD; break;
385 case TOK_SUB: op = SUB; break;
386 case TOK_SEQ: op = SEQ; break;
387 case TOK_SNE: op = SNE; break;
388 case TOK_MUL: op = MUL; break;
389 case TOK_DIV: op = DIV; break;
390 case TOK_DJN: op = DJN; break;
391 case TOK_SLT: op = SLT; break;
392 case TOK_MOD: op = MODM; break;
393 case TOK_NOP: op = NOP; break;
394 case TOK_LDP: op = LDP; break;
395 case TOK_STP: op = STP; break;
396 default:
397 panic_bad_token( tok, "an opcode" );
398 }
399
400 /* Match modifier
401 */
402 s = get_tok( s, &tok ); /* first the '.' */
403 if ( tok != '.' )
404 panic_bad_token( tok, "'.'" );
405
406 s = get_tok( s, &tok ); /* then the modifier itself */
407 if ( ! is_tok_modifier(tok) )
408 panic_bad_token( tok, "a modifier");
409 m = tok - TOK_mF;
410
411 /* Match a-field addressing mode and a-field
412 */
413 s = get_tok( s, &tok );
414 if ( ! is_tok_mode(tok) )
415 panic_bad_token( tok, "an addressing mode specifier");
416 ma = tok - TOK_DIRECT;
417
418 s = get_tok( s, &tok );
419 if ( tok != TOK_INT )
420 panic_bad_token( tok, "an integer");
421 in->a = MODS(tok_int,CORESIZE);
422
423 /* Match comma
424 */
425 s = get_tok( s, &tok );
426 if ( tok != ',' )
427 panic_bad_token( tok, "','" );
428
429 /* Match b-field addressing mode and a-field
430 */
431 s = get_tok( s, &tok );
432 if ( ! is_tok_mode(tok) )
433 panic_bad_token( tok, "an addressing mode specifier");
434 mb = tok - TOK_DIRECT;
435
436 s = get_tok( s, &tok );
437 if ( tok != TOK_INT )
438 panic_bad_token( tok, "an integer");
439 in->b = MODS(tok_int,CORESIZE);
440
441
442 /*
443 * Set flags and ignore the rest of the line
444 */
445 in->in = (flags << flPOS) | OP( op, m, ma, mb );
446 return ASMLINE_OK;
447 }
448
449
450
451
452 /* NAME
453 * asm_file, asm_fname -- assemble a FILE into a warrior
454 *
455 * SYNOPSIS
456 * void asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE );
457 * void asm_fname( const char *filename, warrior_t *w,
458 * unsigned int CORESIZE );
459 *
460 * INPUTS
461 * w -- warrior_t to assemble into.
462 * F -- stream to read warrior source from
463 * filename -- path to source file. May be '-'
464 * which is interpreted as stdin.
465 * CORESIZE -- just that
466 *
467 * DESCRIPTION
468 * These functions assemble a source file into a
469 * warrior_t setting all the non-info fields.
470 *
471 * RESULTS
472 * If the warrior assembled correctly, then warrior_t
473 * contains its code and starting offset. If an error
474 * occured during assembly, an error message is issued
475 * and the program exit()s.
476 *
477 * GLOBALS
478 * none as such, subroutines use tok_buf[], tok_int, str_toks[],
479 * MAXLENGTH constant
480 *
481 * SEE ALSO
482 * asm_line()
483 *
484 * BUGS
485 * Its not really acceptable to exit() on an assembly error.
486 */
487 void
asm_file(FILE * F,warrior_t * w,unsigned int CORESIZE)488 asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE )
489 {
490 char line[MAX_ALL_CHARS];
491 insn_t *c;
492 int ret; /* return code from asm_line() */
493
494 w->len = w->start = 0;
495 w->have_pin = 0;
496 w->pin = 0;
497 c = w->code;
498
499 while ( fgets(line, MAX_ALL_CHARS, F) ) {
500 ret = asm_line( line, c, CORESIZE );
501 if ( ret == ASMLINE_DONE ) break;
502
503 switch ( ret ) {
504 case ASMLINE_OK:
505 if ( get_flags( c->in ) & fl_START ) {
506 w->start = w->len;
507 clr_flags( c->in, fl_START );
508 }
509 if ( w->len < MAXLENGTH) c++;
510 w->len++;
511 break;
512
513 case ASMLINE_ORG:
514 w->start = c->a; /* was `ORG int', get the starting address */
515 break;
516
517 case ASMLINE_NONE:
518 break; /* nop */
519
520 case ASMLINE_PIN:
521 w->have_pin = 1;
522 w->pin = c->a; /* save PIN. */
523 break;
524
525 default:
526 fprintf(stderr,"asm.c/asm_file(): illegal return code from asm_line()\n");
527 exit(1);
528 }
529 if ( w->len > MAXLENGTH ) {
530 fprintf(stderr, "too many instructions in warrior %d\n", w->no);
531 exit(1);
532 }
533 }
534 if ( w->start >= w->len ) {
535 fprintf(stderr, "starting address must be inside warrior body\n" );
536 exit(1);
537 }
538 }
539
540
541 void
asm_fname(const char * fname,warrior_t * w,unsigned int CORESIZE)542 asm_fname( const char *fname, warrior_t *w, unsigned int CORESIZE )
543 {
544 FILE *F;
545 int is_stdin = 0;
546
547 if ( strcmp( fname, "-" ) == 0 ) {
548 F = stdin;
549 is_stdin = 1;
550 }
551 else
552 if (!( F = fopen(fname, "r") )) {
553 fprintf(stderr, "can't open file %s\n", fname);
554 exit(1);
555 }
556
557 asm_file(F, w, CORESIZE);
558
559 if ( !is_stdin ) fclose(F);
560 }
561
562
563
564 /* NAME
565 * dis1 -- disasemble an instruction
566 * discore -- disasemble a segment of core
567 *
568 * SYNOPSIS
569 * void dis1( char *s, inst_t in, unsigned int CORESIZE );
570 * void discore( inst_t *core, unsigned int start, unsigned int end,
571 * unsigned int CORESIZE );
572 *
573 * INPUTS
574 * s -- string to print disassembled instruction to. A string
575 * of length 60 should be more than sufficient.
576 * in -- instruction to disassemble
577 * core -- pointer to start of core
578 * start -- core segment start offset
579 * end -- core segment end offset (excluded)
580 *
581 * RESULTS
582 * dis1 -- The disassembled instruction is printed to `s'.
583 * discore -- A segment of core is dissasembled and printed
584 * to stdout with core addresses.
585 */
586
587 void
dis1(char * buf,insn_t in,unsigned int CORESIZE)588 dis1(char *buf, insn_t in, unsigned int CORESIZE)
589 {
590 int x;
591 char *op_s, *mo_s, *ma_s, *mb_s;
592 int af, bf;
593
594 x = (in.in >> opPOS) & opMASK;
595 switch( x ) {
596 case DAT: op_s = "dat"; break;
597 case SPL: op_s = "spl"; break;
598 case MOV: op_s = "mov"; break;
599 case JMP: op_s = "jmp"; break;
600 case JMZ: op_s = "jmz"; break;
601 case JMN: op_s = "jmn"; break;
602 case ADD: op_s = "add"; break;
603 case SUB: op_s = "sub"; break;
604 case SEQ: op_s = "seq"; break;
605 case SNE: op_s = "sne"; break;
606 case MUL: op_s = "mul"; break;
607 case DIV: op_s = "div"; break;
608 case DJN: op_s = "djn"; break;
609 case SLT: op_s = "slt"; break;
610 case MODM: op_s = "mod"; break;
611 case NOP: op_s = "nop"; break;
612 case LDP: op_s = "ldp"; break;
613 case STP: op_s = "stp"; break;
614 default:
615 op_s = "???";
616 }
617
618 x = (in.in >> moPOS) & moMASK;
619 switch ( x ) {
620 case mF: mo_s = "f "; break;
621 case mA: mo_s = "a "; break;
622 case mB: mo_s = "b "; break;
623 case mAB: mo_s = "ab"; break;
624 case mBA: mo_s = "ba"; break;
625 case mX: mo_s = "x "; break;
626 case mI: mo_s = "i "; break;
627 default:
628 mo_s = "?";
629 }
630
631
632 x = (in.in >> maPOS) & mMASK;
633 switch (x) {
634 case DIRECT: ma_s = "$"; break;
635 case IMMEDIATE: ma_s = "#"; break;
636 case AINDIRECT: ma_s = "*"; break;
637 case BINDIRECT: ma_s = "@"; break;
638 case APREDEC: ma_s = "{"; break;
639 case APOSTINC: ma_s = "}"; break;
640 case BPREDEC: ma_s = "<"; break;
641 case BPOSTINC: ma_s = ">"; break;
642 default: ma_s = "?";
643 }
644
645 x = (in.in >> mbPOS) & mMASK;
646 switch (x) {
647 case DIRECT: mb_s = "$"; break;
648 case IMMEDIATE: mb_s = "#"; break;
649 case AINDIRECT: mb_s = "*"; break;
650 case BINDIRECT: mb_s = "@"; break;
651 case APREDEC: mb_s = "{"; break;
652 case APOSTINC: mb_s = "}"; break;
653 case BPREDEC: mb_s = "<"; break;
654 case BPOSTINC: mb_s = ">"; break;
655 default: mb_s = "?";
656 }
657
658 af = in.a <= CORESIZE/2 ? in.a : in.a - CORESIZE;
659 bf = in.b <= CORESIZE/2 ? in.b : in.b - CORESIZE;
660
661 sprintf(buf,"%s.%s %s%5d , %s%5d", op_s, mo_s, ma_s, af, mb_s, bf);
662 }
663
664
665 void
discore(const insn_t * core,int start,int end,unsigned int CORESIZE)666 discore( const insn_t *core,
667 int start,
668 int end,
669 unsigned int CORESIZE )
670 {
671 int adr;
672 char line[MAX_ALL_CHARS];
673 for ( adr = start; adr < end; adr++ ) {
674 int i = MODS(adr, CORESIZE);
675 dis1( line, core[i], CORESIZE );
676 printf("%4d %s\n", adr, line);
677 }
678 }
679