1#!/usr/local/bin/perl
2
3# Z88DK Z80 Macro Assembler
4#
5# Copyright (C) Paulo Custodio, 2011-2019
6# License: The Artistic License 2.0, http://www.perlfoundation.org/artistic_license_2_0
7#
8# Preprocessor that translates z80asm source code for CP/M's Z80MR, generates .i file with
9# standard Z80 asm code and calls z80asm. Any error message is mapped back to the original
10# source file line.
11#
12# Added features:
13# - Assembly MACROs with named parameters and optional local symbols: MACRO .. LOCAL xx .. ENDM
14# - DEFL to redefine a symbol, maybe using the previous value
15# - EQU to define a symbol, translated to DEFC
16# - Label fields start on column 1 and don't need dot-prefix or colon-suffix
17# - END ends the assembly
18# - DW as synonym to DEFW
19# - DDB to back word in big-endian form
20# - DB, DEFM, DATA as synonym to DEFB
21# - DS as synonym to DEFS
22# - *INCLUDE to include files
23# - new expression operators: .AND. .OR. .XOR. .NOT. .SHR. .SHL. .HIGH. .LOW. .EQU. .GT. .LT.
24# - optional upper case all source before assembly
25
26use strict;
27use warnings;
28use Capture::Tiny 'capture';
29use File::Basename;
30use IO::File;
31use File::Spec;
32use Iterator::Simple qw( iter ienumerate iflatten imap igrep );
33use Iterator::Simple::Lookahead;
34use Regexp::Common;
35use FindBin;
36use Data::Dump 'dump';
37
38#------------------------------------------------------------------------------
39# Globals
40#------------------------------------------------------------------------------
41our @OPTIONS;		# list of options to pass to z80asm
42our %DEFINE;		# list of -D defines from command line, or DEFL vars
43our %MACRO;			# macros { args, local, lines }
44our %DEFL;			# variable-value macros
45our $DEFL_RE;		# match any DEFL name
46our $UCASE;			# if true all text is capitalized on reading from file
47
48our $NAME_RE =
49	qr/ [_a-z]  \w* /ix;
50our $MACRO_PARAM_RE =
51	qr/ [_a-z#] \w* /ix;
52our $LABEL_RE =
53	qr/ (?| ^ 		     (?<label> $NAME_RE) (?: \s+ | \s* : \s* )
54		  | ^ \s* \. \s* (?<label> $NAME_RE) \s+
55		  | ^ \s*        (?<label> $NAME_RE) \s* : \s*
56		  ) /ix;
57our $OPT_LABEL_RE =
58	qr/ ^ (?<label_field> $LABEL_RE | \s+ ) /ix;
59our $QSTR_RE =
60	qr/ (?| ' (?<str> [^']* ) '
61		  | " (?<str> [^"]* ) "
62		  ) /ix;
63our $QFILE_RE =
64	qr/ (?| ' ( [^']+ ) '
65		  | " ( [^"]+ ) "
66		  | < ( [^>]+ ) >
67		  |   ( \S+   )
68		  ) /ix;
69
70our $EXPR_RE =
71	qr/	\s* (?&EXPR)
72
73		(?(DEFINE)
74			(?<TERM>	\s*
75						(?> \d+
76						|   \w+
77						|   \$
78						|   \( \s* (?&EXPR) \s* \)
79						)
80			)
81			(?<UN_OP>	\s*
82						[\-\+\!\~] )
83			(?<BIN_OP>	\s*
84						(?: << | >>
85						  | >= | <= | == | <> | !=
86						  | \&\& | \|\|
87						  | \*\*
88						  |	[\-\+\*\/\%\&\|\^]
89						)
90			)
91			(?<FACTOR>	\s* (?&UN_OP)*
92						\s* (?&TERM)
93			)
94			(?<EXPR>	\s*	(?&FACTOR)
95						(?> \s* (?&BIN_OP)
96							\s* (?&FACTOR)
97						)*
98			)
99		)
100	  /ix;
101
102#------------------------------------------------------------------------------
103# Handle include path
104#------------------------------------------------------------------------------
105sub add_path {
106	my(@dirs) = @_;
107	our @INC_PATH;
108
109	push @INC_PATH, @dirs;
110}
111
112sub search_path {
113	my($file) = @_;
114	our @INC_PATH;
115
116	return $file if -f $file;	# found
117	for my $dir (@INC_PATH) {
118		my $path = File::Spec->catfile($dir, $file);
119		return $path if -f $path;
120	}
121
122	die "File $file not found in path (@INC_PATH)\n";
123}
124
125#------------------------------------------------------------------------------
126# Handle defines
127#------------------------------------------------------------------------------
128sub add_define {
129	my($name, $value) = @_;
130	$DEFINE{$name} = $value || 1;
131}
132
133#------------------------------------------------------------------------------
134# errors
135#------------------------------------------------------------------------------
136sub error {
137	my($line, $message) = @_;
138	die "Error at file ", $line->{file}, " line ", $line->{line_nr},
139		": ", $message, "\n";
140}
141
142#------------------------------------------------------------------------------
143# autolabel
144#------------------------------------------------------------------------------
145sub autolabel {
146	my($template) = @_;
147	our $LABEL_NUM;
148	$LABEL_NUM++;
149	$template =~ s/\W//g;
150	return "AUTOLABEL_".$template."_".$LABEL_NUM;
151}
152
153#------------------------------------------------------------------------------
154# expressions
155#------------------------------------------------------------------------------
156sub eval_expr {
157	my($expr) = @_;
158
159	# try to eval as arithmetic expression
160	use integer;
161	my $new_value = eval("0+($expr)");
162	if (! $@) {		# ok
163		return $new_value;
164	}
165	else {
166		return $expr;
167	}
168}
169
170sub high_expr {
171	my($arg) = @_;
172	return eval_expr("((($arg) >> 8) & 255)");
173}
174
175sub low_expr {
176	my($arg) = @_;
177	return eval_expr("(($arg) & 255)");
178}
179
180#------------------------------------------------------------------------------
181# macro utilities
182#------------------------------------------------------------------------------
183sub extract_macro_params {
184	my($text, $line) = @_;
185	my @params = split(/,/, $text);
186	for (@params) {
187		s/^\s+//;
188		s/\s+$//;
189		/^ $MACRO_PARAM_RE $/ix
190			or error($line, "invalid macro parameter: $_");
191	}
192	return @params;
193}
194
195sub parse_macro_args {
196	my($args) = @_;
197	$args =~ s/^\s+//;
198	$args =~ s/\s+$//;
199	return () if $args eq '';
200
201	my @values = ('');
202	while (! ($args =~ /\G $ /gcx)) {
203		if ( $args =~ /\G ( $QSTR_RE ) /gcx) {
204			$values[-1] .= $1;
205		}
206		elsif ( $args =~ /\G \s* , \s* /gcx) {
207			push @values, '';
208		}
209		elsif ( $args =~ /\G ( . ) /gcxs) {
210			$values[-1] .= $1;
211		}
212		else {
213			die;
214		}
215	}
216
217	# unquote quoted macro arguments
218	for (@values) {
219		if (/^ $QSTR_RE $/ix) {
220			$_ = $+{str};
221		}
222	}
223
224	return @values;
225}
226
227sub expand_macro {
228	my($call_line, $label, $name, $args) = @_;
229	my @ret;
230
231	my $macro = $MACRO{uc($name)} or die;
232	my %line = %$call_line;
233
234	# copy label
235	if ($label) {
236		$line{text} = "$label:";
237		push @ret, {%line};
238	}
239
240	# expand macro
241	my @values = parse_macro_args($args);
242	my $text = join("\n", @{$macro->{lines}});
243
244	for my $local (@{$macro->{local}}) {
245		my $autolabel = autolabel($local);
246		$text =~ s/$local/$autolabel/ig;
247	}
248
249	for my $arg (@{$macro->{args}}) {
250		my $value = shift(@values) // '';
251		$text =~ s/$arg/$value/ig;
252	}
253
254	error($call_line, "extra macro arguments") if @values;
255
256	for (split(/\n/, $text)) {
257		$line{text} = $_;
258		push @ret, {%line};
259	}
260
261	return iter(\@ret);
262}
263
264#------------------------------------------------------------------------------
265# DEFL utilities
266#------------------------------------------------------------------------------
267sub define_defl {
268	my($name, $expr) = @_;
269	$expr =~ s/^\s+//;
270	$expr =~ s/\s+$//;
271
272	my $old_value = $DEFL{uc($name)} || 0;
273
274	# use old value
275	$expr =~ s/ \b $name \b /($old_value)/gix;
276
277	# try to eval as arithmetic expression
278	$expr = eval_expr($expr);
279
280	# store
281	$DEFL{uc($name)} = $expr;
282	my $re = join("|", keys %DEFL);
283	$DEFL_RE = qr/ \b ( $re ) \b /ix;
284}
285
286#------------------------------------------------------------------------------
287# read parsed lines - stack of iterators
288#------------------------------------------------------------------------------
289sub read_lines_it {
290	my($file) = @_;
291	return
292		remove_blank_lines(
293		parse_directives_it(
294		expand_defl_it(
295		define_asmpc_it(
296		convert_expr_it(
297		expand_macros_it(
298		parse_macros_it(
299		remove_comments_it(
300		convert_ucase_it(
301		parse_include_it(
302		add_label_suffix(
303		read_file_it($file))))))))))));
304}
305
306# read lines from file { text, file, line_nr }, text is chompped
307sub read_file_it {
308	my($file) = @_;
309	my $path = search_path($file);
310	return
311		imap { {text => $_->[1], file => $path, line_nr => 1+$_->[0]} }
312	    ienumerate
313		imap { s/\s+$//; $_ }
314		iter( IO::File->new($path) );
315}
316
317# add ':' after label names
318sub add_label_suffix {
319	my($in) = @_;
320	return
321		imap {
322			for ($_->{text}) {
323				if ( $_ =~ /^\s*(IF|IFDEF|IFNDEF|ELSE|ENDIF)/i ) { next; }
324				s/^(\w+)\s+(\w+)/$1: $2/;
325				s/^(\w+)\s*$/$1:/;
326			}
327			$_;
328		}
329		$in;
330}
331
332# parse INCLUDE
333sub parse_include_it {
334	my($in) = @_;
335	return
336		iflatten
337		sub {
338			defined(my $line = <$in>) or return;
339			if ( $line->{text} =~
340				/^ [\#\*]? \s* INCLUDE \s+ $QFILE_RE /ix ) {
341				return read_file_it($1);
342			}
343			return $line;
344		};
345}
346
347# remove comments
348sub remove_comments_it {
349	my($in) = @_;
350	return
351		imap {
352			for ($_->{text}) {
353				s/^\s*;.*//;
354#				s/^\s*\#.*//;
355				s/ (?:  (?<af1>		af\'		)
356				   |	(?<qstr>	$QSTR_RE	)
357				   |	(?<comment>	\s* ; .*	)
358				   |    (?<any>		.	    	)
359				   )
360				 / defined($+{af1}) 			? $+{af1}
361				 : defined($+{qstr}) 			? $+{qstr}
362				 : defined($+{any})				? $+{any}
363				 : ""
364				 /egsxi;
365				s/\s+$//;
366			}
367			$_;
368		}
369		$in;
370}
371
372# parse macro .. endm
373sub parse_macros_it {
374	my($in) = @_;
375	return iter sub {
376		while (1) {
377			defined(my $line = <$in>) or return;
378			if ($line->{text} =~
379				/^ $LABEL_RE \b MACRO \b (?<args> .*)/ix) {
380				# get NAME and ARGS
381				my $name = $+{label};
382				my @args = extract_macro_params($+{args}, $line);
383
384				# search for LOCAL and ENDM, collect lines
385				my @lines;
386				my @local;
387				while (1) {
388					defined(my $macro_line = <$in>)
389						or error($line, "missing ENDM");
390
391					last if $macro_line->{text} =~ /^ \s+ ENDM \b /ix;
392
393					if ($macro_line->{text} =~
394						/^ \s+ LOCAL \b (?<args> .*)/ix) {
395						push @local, extract_macro_params($+{args}, $macro_line);
396					}
397					else {
398						push @lines, $macro_line->{text};
399					}
400				}
401
402				# save macro
403				$MACRO{uc($name)} and error($line, "macro multiply defined");
404				$MACRO{uc($name)} = {
405					args 	=> \@args,
406					local 	=> \@local,
407					lines	=> \@lines,
408				};
409			}
410			else {
411				return $line;
412			}
413		}
414	}
415}
416
417# expand macros
418sub expand_macros_it {
419	my($in) = @_;
420	return
421		iflatten
422		imap {
423			if ( $_->{text} =~
424				 /^ $OPT_LABEL_RE \b
425				    (?<name> $NAME_RE) \b
426				    (?<args> .*) $/ix &&
427			     defined( $MACRO{ uc( $+{name} ) } ) ) {
428				return expand_macro($_, $+{label}, $+{name}, $+{args});
429			}
430			else {
431				return $_;
432			}
433		}
434		$in;
435}
436
437# convert expression to z80asm format:
438# - convert strings to lists of character codes
439# - numbers to decimal
440# - Z80MR operators to C-standard operators
441sub convert_expr_it {
442	my($in) = @_;
443	return imap {
444		for ($_->{text}) {
445			if ( $_ =~ /^\s*BINARY\s*/i ) {
446				# Skip 'binary' directive.
447				next;
448			}
449			s{ [\%\@] ( [\'\"] ) (?<str> [\-\#]+ ) \g{-2}
450			 }{ oct('0b'.join('',
451							  map {$_ eq '#' ? '1' : '0'}
452							  split(//, $+{str} ) ) )
453			  }egxi;
454			s{ $QSTR_RE }{ join(",", map {ord} split(//, $+{str})) }egxi;
455			s/ (?| \b   ( \d [0-9A-F]* ) h \b
456				 | \$   (    [0-9A-F]+ ) \b
457				 | \#   (    [0-9A-F]+ ) \b
458				 | \&h  (    [0-9A-F]+ ) \b
459				 | 0x   (    [0-9A-F]+ ) \b
460				 ) / hex($1) /egxi;
461			s/ (?| \b   (    [01]+ ) b \b
462				 | \%   (    [01]+ ) \b
463				 | \@   (    [01]+ ) \b
464				 | \&b  (    [01]+ ) \b
465				 | 0b   (    [01]+ ) \b
466				 ) / oct("0b".$1) /egxi;
467			s/ \. AND \. / & /gxi;
468			s/ \. OR  \. / | /gxi;
469			s/ \. XOR \. / ^ /gxi;
470			s/ \. NOT \. / ! /gxi;
471			s/ \. SHR \. / >> /gxi;
472			s/ \. SHL \. / << /gxi;
473			s/ \. EQU \. / == /gxi;
474			s/ \. GT  \. / > /gxi;
475			s/ \. LT  \. / < /gxi;
476			s/ \. HIGH \. \s* ( $EXPR_RE ) / '('.high_expr($1).')' /egxi;
477			s/ \. LOW  \. \s* ( $EXPR_RE ) / '('.low_expr($1).')'  /egxi;
478		}
479		$_;
480	} $in;
481}
482
483# replace $ and ASMPC by newly generated autolabel
484sub define_asmpc_it {
485	my($in) = @_;
486	return
487		iflatten
488		imap {
489			if ($_->{text} =~ / \$ | \b ASMPC \b /ix) {
490				my @ret;
491				my $label = autolabel("pc");
492				$_->{text} =~ s/ \$ | \b ASMPC \b / $label /gix;
493
494				my %line = %$_;
495				$line{text} = "$label:";
496				push @ret, { %$_, text => "$label:" };
497				push @ret, { %$_ };
498				return iter(\@ret);
499			}
500			else {
501				return $_;
502			}
503		}
504		$in;
505}
506
507# expand LABEL DEFL VALUE replacing all occurences of LABEL by VALUE
508# Note: hides z80asm's DEFL for define long
509sub expand_defl_it {
510	my($in) = @_;
511	return
512		imap {
513			if ($_->{text} =~
514				/^ $LABEL_RE \b DEFL \b \s* (?<expr> .*)/ix) {
515				define_defl( $+{label}, $+{expr} );
516				$_->{text} = "";
517			}
518			elsif (%DEFL) {
519				$_->{text} =~ s/ \b ( $DEFL_RE ) \b /$DEFL{uc($1)}/gix;
520			}
521			$_;
522		}
523		$in;
524}
525
526# parse assembly directives, replace with z80asm version
527sub parse_directives_it {
528	my($in) = @_;
529	return iter sub {
530		while (1) {
531			defined(my $line = <$in>) or return;
532
533			if ($line->{text} =~
534				/^ $OPT_LABEL_RE \b END \b/ix) {
535				# END: ignore rest of input
536				1 while (defined($line = <$in>));
537			}
538			elsif ($line->{text} =~
539				/^ $OPT_LABEL_RE \b DDB \b \s* (?<args> .*)/ix) {
540				# DDB: words with MSB first
541				my $label_field = $+{label_field};
542				my @args = split(/\s*,\s*/, $+{args});
543				my @bytes;
544				for (@args) {
545					push @bytes, high_expr($_), low_expr($_);
546				}
547				$line->{text} = $label_field."DEFB ".join(",", @bytes);
548			}
549			else {
550				for ($line->{text}) {
551					s/^ ( $OPT_LABEL_RE ) DW \b /${1}DEFW/ix;
552					s/^ ( $OPT_LABEL_RE ) ( DB | DEFM | DATA ) \b /${1}DEFB/ix;
553					s/^ ( $OPT_LABEL_RE ) DS \b /${1}DEFS/ix;
554					s/^ $LABEL_RE \b EQU \b \s* (?<args> .*) /
555						"\tDEFC ".$+{label}." = ".eval_expr($+{args}) /eix;
556				}
557			}
558
559			return $line;
560		}
561	};
562}
563
564# capitalize code if --ucase
565sub convert_ucase_it {
566	my($in) = @_;
567	if ($UCASE) {
568		return imap { $_->{text} = uc($_->{text}); $_ } $in;
569	}
570	else {
571		return $in;
572	}
573}
574
575# remove blank lines
576sub remove_blank_lines {
577	my($in) = @_;
578	return
579		igrep { $_->{text} =~ /\S/ }
580		$in;
581}
582
583#------------------------------------------------------------------------------
584# assemble the source file
585#------------------------------------------------------------------------------
586sub assemble_file {
587	my($src_file) = @_;
588	my $it = read_lines_it($src_file);
589
590	# build .i file and line map for error messages
591	my $i_file = $src_file;	$i_file =~ s/\.\w+$/.i/;
592
593	my @line_map;
594	my $line_nr;
595	open(my $fh, ">", $i_file) or die "write $i_file: $!";
596	my $last_line = "";
597	while (defined(my $in = <$it>)) {
598		my $this_line = ";;".$in->{file}.":".$in->{line_nr}."\n";
599		if ($this_line ne $last_line) {
600			$line_nr++;
601			print $fh $this_line;
602			$last_line = $this_line;
603		}
604
605		$line_nr++;
606		print $fh $in->{text}, "\n";
607		$line_map[$line_nr] = $in;
608	}
609	close $fh;
610
611	# assemble, translate error messages
612	my @cmd = ('z80asm', @OPTIONS, $i_file);
613	print "@cmd\n";
614	$cmd[0] = $FindBin::Bin.'/z80asm';
615	my ($stdout, $stderr, $exit) = capture {
616		system @cmd;
617	};
618
619	$stderr =~ s/(at file ')([^']+)(' line )(\d+)/
620				 $1 . $line_map[$4]{file} . $3 . $line_map[$4]{line_nr} /ge;
621	print $stdout;
622	print STDERR $stderr;
623
624	exit 1 if $exit != 0;
625}
626
627#------------------------------------------------------------------------------
628while (@ARGV && $ARGV[0] =~ /^-/) {
629	local $_ = shift;
630	if    (/^-I(.*)/ ) {					add_path($1); }
631	elsif (/^-D($NAME_RE)(?:=(.*))?/ ) {	define_defl(uc($1), $2 || 1); }
632	elsif (/^--ucase$/ ) {					$UCASE = 1; }
633	else {									push @OPTIONS, $_; }
634}
635
636@ARGV or die "Usage: ", basename($0), " [-Ipath][-Dvar[=value]] FILE...\n";
637assemble_file($_) for @ARGV;
638exit 0;
639