1 /* asm.c: primitive redcode assembler
2 * $Id: asm.c,v 1.8 2002/10/01 22:24:46 rowan Exp $
3 */
4
5 /* This file is part of `exhaust', a memory array redcode simulator.
6 * Author: M Joonas Pihlaja
7 * Public Domain.
8 */
9
10 /* The format of lines with instructions should be:
11 *
12 * [START] OPCODE.MODIFIER A-MODE INT , B-MODE INT
13 *
14 * The ORG pseudo-op is ignored, as is the label after an optional
15 * END (if given). The only label recognised is START. No fuss over
16 * the amount of white space, as long as it exists where required.
17 *
18 * Comments are recognised and discarded as is any line starting
19 * with "Program". The output from `pmars -r 0 Your_Real_Source.red'
20 * should assemble fine with this tiny assembler.
21 *
22 *
23 * Functions in this file:
24 *
25 * asm_line(), asm_file(), asm_fname(), dis1(),
26 * discore()
27 */
28 #include <stdio.h>
29 #ifdef SYSV
30 #include <strings.h>
31 #else
32 #include <string.h>
33 #endif
34 #include <stdlib.h>
35 #include <ctype.h>
36
37 #include "exhaust.h"
38 #include "insn.h"
39 #include "asm.h"
40
41 /* str_tok_t: container for tokens we identify.
42 */
43 typedef struct str_toks_st {
44 char *s; /* name of token */
45 int c; /* token code */
46 } str_toks_t;
47
48 /* Data
49 *
50 * tok_buf[]: globally used to keep the contents of string tokens
51 * tok_int: if the token was a TOK_INT, the value of the token is here
52 *
53 * str_toks[]: table of multicharacter tokens we identify
54 *
55 */
56
57 #define MAX_ALL_CHARS 256
58 static char tok_buf[MAX_ALL_CHARS];
59 static int tok_int;
60
61 static str_toks_t str_toks[] = {
62 { "DAT", TOK_DAT }, /* opcodes */
63 { "SPL", TOK_SPL },
64 { "MOV", TOK_MOV },
65 { "DJN", TOK_DJN },
66 { "ADD", TOK_ADD },
67 { "JMZ", TOK_JMZ },
68 { "SUB", TOK_SUB },
69 { "MOD", TOK_MOD },
70 { "CMP", TOK_SEQ },
71 { "SEQ", TOK_SEQ },
72 { "JMP", TOK_JMP },
73 { "JMN", TOK_JMN },
74 { "SNE", TOK_SNE },
75 { "MUL", TOK_MUL },
76 { "DIV", TOK_DIV },
77 { "SLT", TOK_SLT },
78 { "NOP", TOK_NOP },
79 { "LDP", TOK_LDP },
80 { "STP", TOK_STP },
81
82 { "ORG", TOK_ORG }, /* pseudo-ops */
83 { "END", TOK_END },
84 { "PIN", TOK_PIN },
85 { "START", TOK_START },
86
87 { "F", TOK_mF }, /* modifiers */
88 { "A", TOK_mA },
89 { "B", TOK_mB },
90 { "AB", TOK_mAB },
91 { "BA", TOK_mBA },
92 { "X", TOK_mX },
93 { "I", TOK_mI },
94 { NULL, 0 } /* sentinel */
95 };
96
97
98
99 /* NAME
100 * get_tok -- read the next token from a string
101 *
102 * SYNOPSIS
103 * const char *get_tok( const char *s, int *tok );
104 *
105 * INPUTS
106 * s -- string to read token from
107 * tok -- where we store the token code of the read token
108 *
109 * RESULTS
110 * The token code of the read token is stored into *tok,
111 * with 0 signifying end of input.
112 *
113 * If the token was an integer, its value is stored into
114 * the global `tok_int'. Integers may be in any base >= 10
115 * as according to strtol().
116 *
117 * String tokens are converted to upper case when storing
118 * them into the global `tok_str[]'. They are concatenated
119 * at 255 characters.
120 *
121 * RETURN VALUE
122 * Pointer to the character past the read token, or
123 * to the nul character if at end of input.
124 *
125 * GLOBALS
126 * tok_buf[] -- a string or char token is copied here
127 * tok_int -- the value of an integer token
128 * str_toks[] -- used to identify string tokens
129 */
130
131 /* skip_white(): returns ptr. to next non-whitespace char in s */
132 static
133 const char *
skip_white(const char * s)134 skip_white(const char *s)
135 {
136 while ( isspace(*s) ) s++;
137 return s;
138 }
139
140 static
141 const char *
get_tok(const char * s,int * tok)142 get_tok( const char *s, int *tok )
143 {
144 char *tok_str = tok_buf;
145 int i;
146
147 s = skip_white(s);
148 if ( *s == 0 ) return (*tok = 0, s);
149
150 /*
151 * Tokenize strings.
152 *
153 * String tokens must start with a letter and consist of
154 * letters, digits, and underscores. Strings are
155 * converted to upper case.
156 */
157 tok_buf[1] = tok_buf[0] = 0;
158
159 i = 0;
160 if ( isalpha(*s) )
161 while ( (isalnum(*s) || *s == '_') && ++i < MAX_ALL_CHARS )
162 *tok_str++ = toupper(*s++);
163 *tok_str = 0;
164
165 if ( tok_str > tok_buf ) {
166 /*
167 * was a string token -- identify it by searching through
168 * the str_toks[] array.
169 */
170 for ( i = 0; str_toks[i].s ; i++ ) {
171 if ( 0 == strcmp( str_toks[i].s, tok_buf ) ) {
172 *tok = str_toks[i].c;
173 return s;
174 }
175 }
176 *tok = TOK_STR; /* normal string, not special */
177 return s;
178 }
179
180
181 /*
182 * Tokenize ints.
183 * Must match /-?[0-9]/
184 */
185 if ( isdigit(*s) || ( *s == '-' && isdigit(*(s+1)) )) {
186 char *endptr;
187 tok_int = strtol( s, &endptr, 0 );
188 *tok = TOK_INT;
189 return endptr;
190 }
191
192
193 /*
194 * Tokenize addressing modes and pass single chars
195 */
196
197 tok_buf[0] = *s; /* store char value as single */
198 tok_buf[1] = 0; /* char string. */
199
200 switch ( *tok = *s++ ) {
201 case '$': *tok = TOK_DIRECT; break;
202 case '#': *tok = TOK_IMMEDIATE; break;
203 case '*': *tok = TOK_AINDIRECT; break;
204 case '@': *tok = TOK_BINDIRECT; break;
205 case '{': *tok = TOK_APREDEC; break;
206 case '<': *tok = TOK_BPREDEC; break;
207 case '}': *tok = TOK_APOSTINC; break;
208 case '>': *tok = TOK_BPOSTINC; break;
209 }
210
211 return s;
212 }
213
214
215
216 /* NAME
217 * panic_bad_token -- issue an error message for a bad token and exit(1)
218 *
219 * SYNOPSIS
220 * void panic_bad_token( int tok, const char *expected );
221 *
222 * INPUTS
223 * tok -- token code of unexpected token
224 * expected -- a string describing what kind of token
225 * was expected. e.g. "a modifier".
226 *
227 * RESULTS
228 * A message Informing the user of the unexpected token,
229 * its possible semantic value, and what type of token
230 * was expected instead.
231 *
232 * GLOBALS
233 * tok_buf, tok_int -- if the token has semantic value we look
234 * for it here.
235 * BUGS
236 * The error message should be much better -- not even location
237 * in the source is given here. *sigh*
238 */
239 static
240 void
panic_bad_token(int tok,const char * expected)241 panic_bad_token( int tok, const char *expected )
242 {
243 char *errstr = NULL;
244 char buf[30];
245
246 memset(buf, 0, 30);
247
248 /* make an errstr
249 */
250 if ( tok_buf[0] )
251 errstr = tok_buf;
252 if ( tok == TOK_INT ) {
253 sprintf(buf, "%d", tok_int );
254 errstr = buf;
255 }
256
257 /* complain and exit with error code
258 */
259 fprintf(stderr, "token '%s' not %s\n", errstr, expected );
260 exit(1);
261 }
262
263
264 /* NAME
265 * asm_line -- assemble a line to an instruction
266 *
267 * SYNOPSIS
268 * int asm_line( const char *line, insn_t *in, unsigned int CORESIZE );
269 *
270 * INPUTS
271 * line -- line to assemble
272 * in -- instruction to assemble into
273 * CORESIZE -- size of core
274 *
275 * RESULTS
276 * If there was anything to assemble, it is assembled into
277 * `in'. If there was a START label, the corresponding flag
278 * is set in the instructions flags. Incomplete or erroneous
279 * input prompt a quick error message and exit(1).
280 *
281 * If the 'ORG start-address' construct is encountered where
282 * `start-address' is an integer, then the `in->a' field contains
283 * the offset in instructions from the start of the warrior
284 * where the warrior should start execution.
285 *
286 * If 'PIN id' is encountered, where `id' is an integer, then the
287 * `in->a' field contains the `id'.
288 *
289 * RETURN VALUE
290 * ASMLINE_PIN : pseudo-op 'PIN' encountered, id saved in `in->a'.
291 * ASMLINE_ORG : pseudo-op 'ORG' encountered, warrior start
292 * saved in `in->a'.
293 * ASMLINE_DONE : done assembling, END opcode found, nothing assembled.
294 * ASMLINE_NONE : nothing to assemble on this line.
295 * ASMLINE_OK : assembled instruction into `in' OK.
296 *
297 * GLOBALS
298 * tok_int, tok_buf[], str_toks[] somewhere down the line.
299 */
300
301 int
asm_line(const char * line,insn_t * in,unsigned int CORESIZE)302 asm_line( const char *line, insn_t *in, unsigned int CORESIZE )
303 {
304 const char *s = line;
305 int tok;
306 int flags = 0;
307 int op, m, ma, mb; /* opcode, modifier, a-mode, b-mode */
308
309 s = get_tok( s, &tok );
310 if ( tok == 0 ) return ASMLINE_NONE;
311
312 /*
313 * Ignore string lines '^Program.*' and comments.
314 */
315 if ( tok == TOK_STR && 0 == strcmp( "PROGRAM", tok_buf ))
316 {
317 return ASMLINE_NONE;
318 }
319 if ( tok == ';' ) return ASMLINE_NONE;
320
321 /*
322 * Now match the instruction's various components:
323 * [START label,] opcode, modifier, a-mode, a-value, b-mode, b-value
324 */
325
326 /* Match possible start label
327 */
328 if ( tok == TOK_START ) {
329 flags |= fl_START;
330 s = get_tok( s, &tok );
331 }
332
333 /* Match opcode
334 */
335 if ( is_tok_pseudoop(tok) ) {
336 switch ( tok ) {
337 case TOK_END:
338 return ASMLINE_DONE; /* signal done assembling */
339
340 case TOK_ORG:
341 s = get_tok( s, &tok ); /* get the next token */
342
343 if ( tok == TOK_START ) /* ignore: */
344 return ASMLINE_NONE; /* start label already matched and processed */
345
346 if ( tok != TOK_INT ) {
347 panic_bad_token( tok, "an integer -- an int or \"START\" "
348 "follows ORG" );
349 }
350 in->a = tok_int;
351 return ASMLINE_ORG;
352
353 case TOK_PIN:
354 s = get_tok( s, &tok );
355 if ( tok != TOK_INT ) {
356 panic_bad_token( tok, "an integer -- PIN must be an unsigned integer");
357 }
358 in->a = tok_int;
359 return ASMLINE_PIN;
360
361 default:
362 panic_bad_token( tok, "a pseudo-op (internal assembler error)" );
363 }
364 }
365 if (!( is_tok_opcode(tok)))
366 panic_bad_token( tok, "an opcode" );
367
368 op = DAT;
369 switch(tok) {
370 case TOK_DAT: op = DAT; break;
371 case TOK_SPL: op = SPL; break;
372 case TOK_MOV: op = MOV; break;
373 case TOK_JMP: op = JMP; break;
374 case TOK_JMZ: op = JMZ; break;
375 case TOK_JMN: op = JMN; break;
376 case TOK_ADD: op = ADD; break;
377 case TOK_SUB: op = SUB; break;
378 case TOK_SEQ: op = SEQ; break;
379 case TOK_SNE: op = SNE; break;
380 case TOK_MUL: op = MUL; break;
381 case TOK_DIV: op = DIV; break;
382 case TOK_DJN: op = DJN; break;
383 case TOK_SLT: op = SLT; break;
384 case TOK_MOD: op = MODM; break;
385 case TOK_NOP: op = NOP; break;
386 case TOK_LDP: op = LDP; break;
387 case TOK_STP: op = STP; break;
388 default:
389 panic_bad_token( tok, "an opcode" );
390 }
391
392 /* Match modifier
393 */
394 s = get_tok( s, &tok ); /* first the '.' */
395 if ( tok != '.' )
396 panic_bad_token( tok, "'.'" );
397
398 s = get_tok( s, &tok ); /* then the modifier itself */
399 if ( ! is_tok_modifier(tok) )
400 panic_bad_token( tok, "a modifier");
401 m = tok - TOK_mF;
402
403 /* Match a-field addressing mode and a-field
404 */
405 s = get_tok( s, &tok );
406 if ( ! is_tok_mode(tok) )
407 panic_bad_token( tok, "an addressing mode specifier");
408 ma = tok - TOK_DIRECT;
409
410 s = get_tok( s, &tok );
411 if ( tok != TOK_INT )
412 panic_bad_token( tok, "an integer");
413 in->a = MODS(tok_int,CORESIZE);
414
415 /* Match comma
416 */
417 s = get_tok( s, &tok );
418 if ( tok != ',' )
419 panic_bad_token( tok, "','" );
420
421 /* Match b-field addressing mode and a-field
422 */
423 s = get_tok( s, &tok );
424 if ( ! is_tok_mode(tok) )
425 panic_bad_token( tok, "an addressing mode specifier");
426 mb = tok - TOK_DIRECT;
427
428 s = get_tok( s, &tok );
429 if ( tok != TOK_INT )
430 panic_bad_token( tok, "an integer");
431 in->b = MODS(tok_int,CORESIZE);
432
433
434 /*
435 * Set flags and ignore the rest of the line
436 */
437 in->in = (flags << flPOS) | OP( op, m, ma, mb );
438 return ASMLINE_OK;
439 }
440
441
442
443
444 /* NAME
445 * asm_file, asm_fname -- assemble a FILE into a warrior
446 *
447 * SYNOPSIS
448 * void asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE );
449 * void asm_fname( const char *filename, warrior_t *w,
450 * unsigned int CORESIZE );
451 *
452 * INPUTS
453 * w -- warrior_t to assemble into.
454 * F -- stream to read warrior source from
455 * filename -- path to source file. May be '-'
456 * which is interpreted as stdin.
457 * CORESIZE -- just that
458 *
459 * DESCRIPTION
460 * These functions assemble a source file into a
461 * warrior_t setting all the non-info fields.
462 *
463 * RESULTS
464 * If the warrior assembled correctly, then warrior_t
465 * contains its code and starting offset. If an error
466 * occured during assembly, an error message is issued
467 * and the program exit()s.
468 *
469 * GLOBALS
470 * none as such, subroutines use tok_buf[], tok_int, str_toks[],
471 * MAXLENGTH constant
472 *
473 * SEE ALSO
474 * asm_line()
475 *
476 * BUGS
477 * Its not really acceptable to exit() on an assembly error.
478 */
479 void
asm_file(FILE * F,warrior_t * w,unsigned int CORESIZE)480 asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE )
481 {
482 char line[MAX_ALL_CHARS];
483 insn_t *c;
484 int ret; /* return code from asm_line() */
485
486 w->len = w->start = 0;
487 w->have_pin = 0;
488 w->pin = 0;
489 c = w->code;
490
491 while ( fgets(line, MAX_ALL_CHARS, F) ) {
492 ret = asm_line( line, c, CORESIZE );
493 if ( ret == ASMLINE_DONE ) break;
494
495 switch ( ret ) {
496 case ASMLINE_OK:
497 if ( get_flags( c->in ) & fl_START ) {
498 w->start = w->len;
499 clr_flags( c->in, fl_START );
500 }
501 if ( w->len < MAXLENGTH) c++;
502 w->len++;
503 break;
504
505 case ASMLINE_ORG:
506 w->start = c->a; /* was `ORG int', get the starting address */
507 break;
508
509 case ASMLINE_NONE:
510 break; /* nop */
511
512 case ASMLINE_PIN:
513 w->have_pin = 1;
514 w->pin = c->a; /* save PIN. */
515 break;
516
517 default:
518 fprintf(stderr,"asm.c/asm_file(): illegal return code from asm_line()\n");
519 exit(1);
520 }
521 if ( w->len > MAXLENGTH ) {
522 fprintf(stderr, "too many instructions in warrior %d\n", w->no);
523 exit(1);
524 }
525 }
526 if ( w->start >= w->len ) {
527 fprintf(stderr, "starting address must be inside warrior body\n" );
528 exit(1);
529 }
530 }
531
532
533 void
asm_fname(const char * fname,warrior_t * w,unsigned int CORESIZE)534 asm_fname( const char *fname, warrior_t *w, unsigned int CORESIZE )
535 {
536 FILE *F;
537 int is_stdin = 0;
538
539 if ( strcmp( fname, "-" ) == 0 ) {
540 F = stdin;
541 is_stdin = 1;
542 }
543 else
544 if (!( F = fopen(fname, "r") )) {
545 fprintf(stderr, "can't open file %s\n", fname);
546 exit(1);
547 }
548
549 asm_file(F, w, CORESIZE);
550
551 if ( !is_stdin ) fclose(F);
552 }
553
554
555
556 /* NAME
557 * dis1 -- disasemble an instruction
558 * discore -- disasemble a segment of core
559 *
560 * SYNOPSIS
561 * void dis1( char *s, inst_t in, unsigned int CORESIZE );
562 * void discore( inst_t *core, unsigned int start, unsigned int end,
563 * unsigned int CORESIZE );
564 *
565 * INPUTS
566 * s -- string to print disassembled instruction to. A string
567 * of length 60 should be more than sufficient.
568 * in -- instruction to disassemble
569 * core -- pointer to start of core
570 * start -- core segment start offset
571 * end -- core segment end offset (excluded)
572 *
573 * RESULTS
574 * dis1 -- The disassembled instruction is printed to `s'.
575 * discore -- A segment of core is dissasembled and printed
576 * to stdout with core addresses.
577 */
578
579 void
dis1(char * buf,insn_t in,unsigned int CORESIZE)580 dis1(char *buf, insn_t in, unsigned int CORESIZE)
581 {
582 int x;
583 char *op_s, *mo_s, *ma_s, *mb_s;
584 int af, bf;
585
586 x = (in.in >> opPOS) & opMASK;
587 switch( x ) {
588 case DAT: op_s = "dat"; break;
589 case SPL: op_s = "spl"; break;
590 case MOV: op_s = "mov"; break;
591 case JMP: op_s = "jmp"; break;
592 case JMZ: op_s = "jmz"; break;
593 case JMN: op_s = "jmn"; break;
594 case ADD: op_s = "add"; break;
595 case SUB: op_s = "sub"; break;
596 case SEQ: op_s = "seq"; break;
597 case SNE: op_s = "sne"; break;
598 case MUL: op_s = "mul"; break;
599 case DIV: op_s = "div"; break;
600 case DJN: op_s = "djn"; break;
601 case SLT: op_s = "slt"; break;
602 case MODM: op_s = "mod"; break;
603 case NOP: op_s = "nop"; break;
604 case LDP: op_s = "ldp"; break;
605 case STP: op_s = "stp"; break;
606 default:
607 op_s = "???";
608 }
609
610 x = (in.in >> moPOS) & moMASK;
611 switch ( x ) {
612 case mF: mo_s = "f "; break;
613 case mA: mo_s = "a "; break;
614 case mB: mo_s = "b "; break;
615 case mAB: mo_s = "ab"; break;
616 case mBA: mo_s = "ba"; break;
617 case mX: mo_s = "x "; break;
618 case mI: mo_s = "i "; break;
619 default:
620 mo_s = "?";
621 }
622
623
624 x = (in.in >> maPOS) & mMASK;
625 switch (x) {
626 case DIRECT: ma_s = "$"; break;
627 case IMMEDIATE: ma_s = "#"; break;
628 case AINDIRECT: ma_s = "*"; break;
629 case BINDIRECT: ma_s = "@"; break;
630 case APREDEC: ma_s = "{"; break;
631 case APOSTINC: ma_s = "}"; break;
632 case BPREDEC: ma_s = "<"; break;
633 case BPOSTINC: ma_s = ">"; break;
634 default: ma_s = "?";
635 }
636
637 x = (in.in >> mbPOS) & mMASK;
638 switch (x) {
639 case DIRECT: mb_s = "$"; break;
640 case IMMEDIATE: mb_s = "#"; break;
641 case AINDIRECT: mb_s = "*"; break;
642 case BINDIRECT: mb_s = "@"; break;
643 case APREDEC: mb_s = "{"; break;
644 case APOSTINC: mb_s = "}"; break;
645 case BPREDEC: mb_s = "<"; break;
646 case BPOSTINC: mb_s = ">"; break;
647 default: mb_s = "?";
648 }
649
650 af = in.a <= CORESIZE/2 ? in.a : in.a - CORESIZE;
651 bf = in.b <= CORESIZE/2 ? in.b : in.b - CORESIZE;
652
653 sprintf(buf,"%s.%s %s%5d , %s%5d", op_s, mo_s, ma_s, af, mb_s, bf);
654 }
655
656
657 void
discore(const insn_t * core,int start,int end,unsigned int CORESIZE)658 discore( const insn_t *core,
659 int start,
660 int end,
661 unsigned int CORESIZE )
662 {
663 int adr;
664 char line[MAX_ALL_CHARS];
665 for ( adr = start; adr < end; adr++ ) {
666 int i = MODS(adr, CORESIZE);
667 dis1( line, core[i], CORESIZE );
668 printf("%4d %s\n", adr, line);
669 }
670 }
671