1 /* asm.c: primitive redcode assembler
2 * $Id: asm.c,v 1.5 2003/08/30 16:18:30 varfar Exp $
3 */
4
5 /* This file is part of `exhaust', a memory array redcode simulator.
6 * Author: M Joonas Pihlaja
7 * Public Domain.
8 */
9
10 /* The format of lines with instructions should be:
11 *
12 * [START] OPCODE.MODIFIER A-MODE INT , B-MODE INT
13 *
14 * The ORG pseudo-op is ignored, as is the label after an optional
15 * END (if given). The only label recognised is START. No fuss over
16 * the amount of white space, as long as it exists where required.
17 *
18 * Comments are recognised and discarded as is any line starting
19 * with "Program". The output from `pmars -r 0 Your_Real_Source.red'
20 * should assemble fine with this tiny assembler.
21 *
22 *
23 * Functions in this file:
24 *
25 * asm_line(), asm_file(), asm_fname(), dis1(),
26 * discore()
27 */
28 #include <stdio.h>
29 #ifdef SYSV
30 #include <strings.h>
31 #else
32 #include <string.h>
33 #endif
34 #include <stdlib.h>
35 #include <ctype.h>
36
37 #include "exhaust.h"
38 #include "insn.h"
39 #include "asm.h"
40
41 /* str_tok_t: container for tokens we identify.
42 */
43 typedef struct str_toks_st {
44 char *s; /* name of token */
45 int c; /* token code */
46 } str_toks_t;
47
48 /* Data
49 *
50 * tok_buf[]: globally used to keep the contents of string tokens
51 * tok_int: if the token was a TOK_INT, the value of the token is here
52 *
53 * str_toks[]: table of multicharacter tokens we identify
54 *
55 */
56
57 #define MAX_ALL_CHARS 256
58 static char tok_buf[MAX_ALL_CHARS];
59 static int tok_int;
60
61 static str_toks_t str_toks[] = {
62 { "DAT", TOK_DAT }, /* opcodes */
63 { "SPL", TOK_SPL },
64 { "MOV", TOK_MOV },
65 { "DJN", TOK_DJN },
66 { "ADD", TOK_ADD },
67 { "JMZ", TOK_JMZ },
68 { "SUB", TOK_SUB },
69 { "MOD", TOK_MOD },
70 { "CMP", TOK_SEQ },
71 { "SEQ", TOK_SEQ },
72 { "JMP", TOK_JMP },
73 { "JMN", TOK_JMN },
74 { "SNE", TOK_SNE },
75 { "MUL", TOK_MUL },
76 { "DIV", TOK_DIV },
77 { "SLT", TOK_SLT },
78 { "NOP", TOK_NOP },
79 { "LDP", TOK_LDP },
80 { "STP", TOK_STP },
81
82 { "ORG", TOK_ORG }, /* pseudo-ops */
83 { "END", TOK_END },
84 { "PIN", TOK_PIN },
85 { "START", TOK_START },
86
87 { "F", TOK_mF }, /* modifiers */
88 { "A", TOK_mA },
89 { "B", TOK_mB },
90 { "AB", TOK_mAB },
91 { "BA", TOK_mBA },
92 { "X", TOK_mX },
93 { "I", TOK_mI },
94 { NULL, 0 } /* sentinel */
95 };
96
97 /* NAME
98 * get_tok -- read the next token from a string
99 *
100 * SYNOPSIS
101 * const char *get_tok( const char *s, int *tok );
102 *
103 * INPUTS
104 * s -- string to read token from
105 * tok -- where we store the token code of the read token
106 *
107 * RESULTS
108 * The token code of the read token is stored into *tok,
109 * with 0 signifying end of input.
110 *
111 * If the token was an integer, its value is stored into
112 * the global `tok_int'. Integers may be in any base >= 10
113 * as according to strtol().
114 *
115 * String tokens are converted to upper case when storing
116 * them into the global `tok_str[]'. They are concatenated
117 * at 255 characters.
118 *
119 * RETURN VALUE
120 * Pointer to the character past the read token, or
121 * to the nul character if at end of input.
122 *
123 * GLOBALS
124 * tok_buf[] -- a string or char token is copied here
125 * tok_int -- the value of an integer token
126 * str_toks[] -- used to identify string tokens
127 */
128
129 /* skip_white(): returns ptr. to next non-whitespace char in s */
130 static
131 const char *
skip_white(const char * s)132 skip_white(const char *s)
133 {
134 while ( isspace(*s) ) s++;
135 return s;
136 }
137
138 static
139 const char *
get_tok(const char * s,int * tok)140 get_tok( const char *s, int *tok )
141 {
142 char *tok_str = tok_buf;
143 int i;
144
145 s = skip_white(s);
146 if ( *s == 0 ) return (*tok = 0, s);
147
148 /*
149 * Tokenize strings.
150 *
151 * String tokens must start with a letter and consist of
152 * letters, digits, and underscores. Strings are
153 * converted to upper case.
154 */
155 tok_buf[1] = tok_buf[0] = 0;
156
157 i = 0;
158 if ( isalpha(*s) )
159 while ( (isalnum(*s) || *s == '_') && ++i < MAX_ALL_CHARS )
160 *tok_str++ = toupper(*s++);
161 *tok_str = 0;
162
163 if ( tok_str > tok_buf ) {
164 /*
165 * was a string token -- identify it by searching through
166 * the str_toks[] array.
167 */
168 for ( i = 0; str_toks[i].s ; i++ ) {
169 if ( 0 == strcmp( str_toks[i].s, tok_buf ) ) {
170 *tok = str_toks[i].c;
171 return s;
172 }
173 }
174 *tok = TOK_STR; /* normal string, not special */
175 return s;
176 }
177
178
179 /*
180 * Tokenize ints.
181 * Must match /-?[0-9]/
182 */
183 if ( isdigit(*s) || ( *s == '-' && isdigit(*(s+1)) )) {
184 char *endptr;
185 tok_int = strtol( s, &endptr, 0 );
186 *tok = TOK_INT;
187 return endptr;
188 }
189
190
191 /*
192 * Tokenize addressing modes and pass single chars
193 */
194
195 tok_buf[0] = *s; /* store char value as single */
196 tok_buf[1] = 0; /* char string. */
197
198 switch ( *tok = *s++ ) {
199 case '$': *tok = TOK_DIRECT; break;
200 case '#': *tok = TOK_IMMEDIATE; break;
201 case '*': *tok = TOK_AINDIRECT; break;
202 case '@': *tok = TOK_BINDIRECT; break;
203 case '{': *tok = TOK_APREDEC; break;
204 case '<': *tok = TOK_BPREDEC; break;
205 case '}': *tok = TOK_APOSTINC; break;
206 case '>': *tok = TOK_BPOSTINC; break;
207 }
208
209 return s;
210 }
211
212
213
214 /* NAME
215 * panic_bad_token -- issue an error message for a bad token and exit(1)
216 *
217 * SYNOPSIS
218 * void panic_bad_token( int tok, const char *expected );
219 *
220 * INPUTS
221 * tok -- token code of unexpected token
222 * expected -- a string describing what kind of token
223 * was expected. e.g. "a modifier".
224 *
225 * RESULTS
226 * A message Informing the user of the unexpected token,
227 * its possible semantic value, and what type of token
228 * was expected instead.
229 *
230 * GLOBALS
231 * tok_buf, tok_int -- if the token has semantic value we look
232 * for it here.
233 * BUGS
234 * The error message should be much better -- not even location
235 * in the source is given here. *sigh*
236 */
237 static
238 void
panic_bad_token(int tok,const char * expected)239 panic_bad_token( int tok, const char *expected )
240 {
241 char *errstr = NULL;
242 char buf[30];
243
244 memset(buf, 0, 30);
245
246 /* make an errstr
247 */
248 if ( tok_buf[0] )
249 errstr = tok_buf;
250 if ( tok == TOK_INT ) {
251 sprintf(buf, "%d", tok_int );
252 errstr = buf;
253 }
254
255 /* complain and exit with error code
256 */
257 fprintf(stderr, "token '%s' not %s\n", errstr, expected );
258 exit(1);
259 }
260
261
262 /* NAME
263 * asm_line -- assemble a line to an instruction
264 *
265 * SYNOPSIS
266 * int asm_line( const char *line, insn_t *in, unsigned int CORESIZE );
267 *
268 * INPUTS
269 * line -- line to assemble
270 * in -- instruction to assemble into
271 * CORESIZE -- size of core
272 *
273 * RESULTS
274 * If there was anything to assemble, it is assembled into
275 * `in'. If there was a START label, the corresponding flag
276 * is set in the instructions flags. Incomplete or erroneous
277 * input prompt a quick error message and exit(1).
278 *
279 * If the 'ORG start-address' construct is encountered where
280 * `start-address' is an integer, then the `in->a' field contains
281 * the offset in instructions from the start of the warrior
282 * where the warrior should start execution.
283 *
284 * If 'PIN id' is encountered, where `id' is an integer, then the
285 * `in->a' field contains the `id'.
286 *
287 * RETURN VALUE
288 * ASMLINE_PIN : pseudo-op 'PIN' encountered, id saved in `in->a'.
289 * ASMLINE_ORG : pseudo-op 'ORG' encountered, warrior start
290 * saved in `in->a'.
291 * ASMLINE_DONE : done assembling, END opcode found, nothing assembled.
292 * ASMLINE_NONE : nothing to assemble on this line.
293 * ASMLINE_OK : assembled instruction into `in' OK.
294 *
295 * GLOBALS
296 * tok_int, tok_buf[], str_toks[] somewhere down the line.
297 */
298
299 int
asm_line(const char * line,insn_t * in,unsigned int CORESIZE)300 asm_line( const char *line, insn_t *in, unsigned int CORESIZE )
301 {
302 const char *s = line;
303 int tok;
304 int flags = 0;
305 int op, m, ma, mb; /* opcode, modifier, a-mode, b-mode */
306
307 s = get_tok( s, &tok );
308 if ( tok == 0 ) return ASMLINE_NONE;
309
310 /*
311 * Ignore string lines '^Program.*' and comments.
312 */
313 if ( tok == TOK_STR && 0 == strcmp( "PROGRAM", tok_buf ))
314 {
315 return ASMLINE_NONE;
316 }
317 if ( tok == ';' ) return ASMLINE_NONE;
318
319 /*
320 * Now match the instruction's various components:
321 * [START label,] opcode, modifier, a-mode, a-value, b-mode, b-value
322 */
323
324 /* Match possible start label
325 */
326 if ( tok == TOK_START ) {
327 flags |= fl_START;
328 s = get_tok( s, &tok );
329 }
330
331 /* Match opcode
332 */
333 if ( is_tok_pseudoop(tok) ) {
334 switch ( tok ) {
335 case TOK_END:
336 return ASMLINE_DONE; /* signal done assembling */
337
338 case TOK_ORG:
339 s = get_tok( s, &tok ); /* get the next token */
340
341 if ( tok == TOK_START ) /* ignore: */
342 return ASMLINE_NONE; /* start label already matched and processed */
343
344 if ( tok != TOK_INT ) {
345 panic_bad_token( tok, "an integer -- an int or \"START\" "
346 "follows ORG" );
347 }
348 in->a = tok_int;
349 return ASMLINE_ORG;
350
351 case TOK_PIN:
352 s = get_tok( s, &tok );
353 if ( tok != TOK_INT ) {
354 panic_bad_token( tok, "an integer -- PIN must be an unsigned integer");
355 }
356 in->a = tok_int;
357 return ASMLINE_PIN;
358
359 default:
360 panic_bad_token( tok, "a pseudo-op (internal assembler error)" );
361 }
362 }
363 if (!( is_tok_opcode(tok)))
364 panic_bad_token( tok, "an opcode" );
365
366 op = DAT;
367 switch(tok) {
368 case TOK_DAT: op = DAT; break;
369 case TOK_SPL: op = SPL; break;
370 case TOK_MOV: op = MOV; break;
371 case TOK_JMP: op = JMP; break;
372 case TOK_JMZ: op = JMZ; break;
373 case TOK_JMN: op = JMN; break;
374 case TOK_ADD: op = ADD; break;
375 case TOK_SUB: op = SUB; break;
376 case TOK_SEQ: op = SEQ; break;
377 case TOK_SNE: op = SNE; break;
378 case TOK_MUL: op = MUL; break;
379 case TOK_DIV: op = DIV; break;
380 case TOK_DJN: op = DJN; break;
381 case TOK_SLT: op = SLT; break;
382 case TOK_MOD: op = MODM; break;
383 case TOK_NOP: op = NOP; break;
384 case TOK_LDP: op = LDP; break;
385 case TOK_STP: op = STP; break;
386 default:
387 panic_bad_token( tok, "an opcode" );
388 }
389
390 /* Match modifier
391 */
392 s = get_tok( s, &tok ); /* first the '.' */
393 if ( tok != '.' )
394 panic_bad_token( tok, "'.'" );
395
396 s = get_tok( s, &tok ); /* then the modifier itself */
397 if ( ! is_tok_modifier(tok) )
398 panic_bad_token( tok, "a modifier");
399 m = tok - TOK_mF;
400
401 /* Match a-field addressing mode and a-field
402 */
403 s = get_tok( s, &tok );
404 if ( ! is_tok_mode(tok) )
405 panic_bad_token( tok, "an addressing mode specifier");
406 ma = tok - TOK_DIRECT;
407
408 s = get_tok( s, &tok );
409 if ( tok != TOK_INT )
410 panic_bad_token( tok, "an integer");
411 in->a = MODS(tok_int,CORESIZE);
412
413 /* Match comma
414 */
415 s = get_tok( s, &tok );
416 if ( tok != ',' )
417 panic_bad_token( tok, "','" );
418
419 /* Match b-field addressing mode and a-field
420 */
421 s = get_tok( s, &tok );
422 if ( ! is_tok_mode(tok) )
423 panic_bad_token( tok, "an addressing mode specifier");
424 mb = tok - TOK_DIRECT;
425
426 s = get_tok( s, &tok );
427 if ( tok != TOK_INT )
428 panic_bad_token( tok, "an integer");
429 in->b = MODS(tok_int,CORESIZE);
430
431
432 /*
433 * Set flags and ignore the rest of the line
434 */
435 in->in = (flags << flPOS) | OP( op, m, ma, mb );
436 return ASMLINE_OK;
437 }
438
439
440
441
442 /* NAME
443 * asm_file, asm_fname -- assemble a FILE into a warrior
444 *
445 * SYNOPSIS
446 * void asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE );
447 * void asm_fname( const char *filename, warrior_t *w,
448 * unsigned int CORESIZE );
449 *
450 * INPUTS
451 * w -- warrior_t to assemble into.
452 * F -- stream to read warrior source from
453 * filename -- path to source file. May be '-'
454 * which is interpreted as stdin.
455 * CORESIZE -- just that
456 *
457 * DESCRIPTION
458 * These functions assemble a source file into a
459 * warrior_t setting all the non-info fields.
460 *
461 * RESULTS
462 * If the warrior assembled correctly, then warrior_t
463 * contains its code and starting offset. If an error
464 * occured during assembly, an error message is issued
465 * and the program exit()s.
466 *
467 * GLOBALS
468 * none as such, subroutines use tok_buf[], tok_int, str_toks[],
469 * MAXLENGTH constant
470 *
471 * SEE ALSO
472 * asm_line()
473 *
474 * BUGS
475 * Its not really acceptable to exit() on an assembly error.
476 */
477 void
asm_file(FILE * F,warrior_t * w,unsigned int CORESIZE)478 asm_file( FILE *F, warrior_t *w, unsigned int CORESIZE )
479 {
480 char line[MAX_ALL_CHARS];
481 insn_t *c;
482 int ret; /* return code from asm_line() */
483
484 w->len = w->start = 0;
485 w->have_pin = 0;
486 w->pin = 0;
487 c = w->code;
488
489 while ( fgets(line, MAX_ALL_CHARS, F) ) {
490 ret = asm_line( line, c, CORESIZE );
491 if ( ret == ASMLINE_DONE ) break;
492
493 switch ( ret ) {
494 case ASMLINE_OK:
495 if ( get_flags( c->in ) & fl_START ) {
496 w->start = w->len;
497 clr_flags( c->in, fl_START );
498 }
499 if ( w->len < MAXLENGTH) c++;
500 w->len++;
501 break;
502
503 case ASMLINE_ORG:
504 w->start = c->a; /* was `ORG int', get the starting address */
505 break;
506
507 case ASMLINE_NONE:
508 break; /* nop */
509
510 case ASMLINE_PIN:
511 w->have_pin = 1;
512 w->pin = c->a; /* save PIN. */
513 break;
514
515 default:
516 fprintf(stderr,"asm.c/asm_file(): illegal return code from asm_line()\n");
517 exit(1);
518 }
519 if ( w->len > MAXLENGTH ) {
520 fprintf(stderr, "too many instructions in warrior %d\n", w->no);
521 exit(1);
522 }
523 }
524 if ( w->start >= w->len ) {
525 fprintf(stderr, "starting address must be inside warrior body\n" );
526 exit(1);
527 }
528 }
529
530
531 void
asm_fname(const char * fname,warrior_t * w,unsigned int CORESIZE)532 asm_fname(const char *fname, warrior_t *w, unsigned int CORESIZE )
533 {
534 FILE *F;
535 int is_stdin = 0;
536
537 if ( strcmp( fname, "-" ) == 0 ) {
538 F = stdin;
539 is_stdin = 1;
540 }
541 else
542 if (!( F = fopen(fname, "r") )) {
543 fprintf(stderr, "can't open file %s\n", fname);
544 exit(1);
545 }
546
547 asm_file(F, w, CORESIZE);
548
549 if ( !is_stdin ) fclose(F);
550 }
551
552
553
554 /* NAME
555 * dis1 -- disasemble an instruction
556 * discore -- disasemble a segment of core
557 *
558 * SYNOPSIS
559 * void dis1( char *s, inst_t in, unsigned int CORESIZE );
560 * void discore( inst_t *core, unsigned int start, unsigned int end,
561 * unsigned int CORESIZE );
562 *
563 * INPUTS
564 * s -- string to print disassembled instruction to. A string
565 * of length 60 should be more than sufficient.
566 * in -- instruction to disassemble
567 * core -- pointer to start of core
568 * start -- core segment start offset
569 * end -- core segment end offset (excluded)
570 *
571 * RESULTS
572 * dis1 -- The disassembled instruction is printed to `s'.
573 * discore -- A segment of core is dissasembled and printed
574 * to stdout with core addresses.
575 */
576
577 void
dis1(char * buf,insn_t in,unsigned int CORESIZE)578 dis1(char *buf, insn_t in, unsigned int CORESIZE)
579 {
580 int x;
581 char *op_s, *mo_s, *ma_s, *mb_s;
582 int af, bf;
583
584 x = (in.in >> opPOS) & opMASK;
585 switch( x ) {
586 case DAT: op_s = "dat"; break;
587 case SPL: op_s = "spl"; break;
588 case MOV: op_s = "mov"; break;
589 case JMP: op_s = "jmp"; break;
590 case JMZ: op_s = "jmz"; break;
591 case JMN: op_s = "jmn"; break;
592 case ADD: op_s = "add"; break;
593 case SUB: op_s = "sub"; break;
594 case SEQ: op_s = "seq"; break;
595 case SNE: op_s = "sne"; break;
596 case MUL: op_s = "mul"; break;
597 case DIV: op_s = "div"; break;
598 case DJN: op_s = "djn"; break;
599 case SLT: op_s = "slt"; break;
600 case MODM: op_s = "mod"; break;
601 case NOP: op_s = "nop"; break;
602 case LDP: op_s = "ldp"; break;
603 case STP: op_s = "stp"; break;
604 default:
605 op_s = "???";
606 }
607
608 x = (in.in >> moPOS) & moMASK;
609 switch ( x ) {
610 case mF: mo_s = "f "; break;
611 case mA: mo_s = "a "; break;
612 case mB: mo_s = "b "; break;
613 case mAB: mo_s = "ab"; break;
614 case mBA: mo_s = "ba"; break;
615 case mX: mo_s = "x "; break;
616 case mI: mo_s = "i "; break;
617 default:
618 mo_s = "?";
619 }
620
621
622 x = (in.in >> maPOS) & mMASK;
623 switch (x) {
624 case DIRECT: ma_s = "$"; break;
625 case IMMEDIATE: ma_s = "#"; break;
626 case AINDIRECT: ma_s = "*"; break;
627 case BINDIRECT: ma_s = "@"; break;
628 case APREDEC: ma_s = "{"; break;
629 case APOSTINC: ma_s = "}"; break;
630 case BPREDEC: ma_s = "<"; break;
631 case BPOSTINC: ma_s = ">"; break;
632 default: ma_s = "?";
633 }
634
635 x = (in.in >> mbPOS) & mMASK;
636 switch (x) {
637 case DIRECT: mb_s = "$"; break;
638 case IMMEDIATE: mb_s = "#"; break;
639 case AINDIRECT: mb_s = "*"; break;
640 case BINDIRECT: mb_s = "@"; break;
641 case APREDEC: mb_s = "{"; break;
642 case APOSTINC: mb_s = "}"; break;
643 case BPREDEC: mb_s = "<"; break;
644 case BPOSTINC: mb_s = ">"; break;
645 default: mb_s = "?";
646 }
647
648 af = in.a <= CORESIZE/2 ? in.a : in.a - CORESIZE;
649 bf = in.b <= CORESIZE/2 ? in.b : in.b - CORESIZE;
650
651 sprintf(buf,"%s.%s %s%5d , %s%5d", op_s, mo_s, ma_s, af, mb_s, bf);
652 }
653
654
655 void
discore(const insn_t * core,int start,int end,unsigned int CORESIZE)656 discore( const insn_t *core,
657 int start,
658 int end,
659 unsigned int CORESIZE )
660 {
661 int adr;
662 char line[MAX_ALL_CHARS];
663 for ( adr = start; adr < end; adr++ ) {
664 int i = MODS(adr, CORESIZE);
665 dis1( line, core[i], CORESIZE );
666 printf("%4d %s\n", adr, line);
667 }
668 }
669