1#!/usr/bin/env perl
2# by David Conrad
3# This code is licensed under GPLv2 or later; go to gnu.org to read it
4#  (not that it much matters for an asm preprocessor)
5# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
6use strict;
7
8# Apple's gas is ancient and doesn't support modern preprocessing features like
9# .rept and has ugly macro syntax, among other things. Thus, this script
10# implements the subset of the gas preprocessor used by x264 and ffmpeg
11# that isn't supported by Apple's gas.
12
13my @gcc_cmd = @ARGV;
14my @preprocess_c_cmd;
15
16my $fix_unreq = $^O eq "darwin";
17
18if ($gcc_cmd[0] eq "-fix-unreq") {
19    $fix_unreq = 1;
20    shift @gcc_cmd;
21} elsif ($gcc_cmd[0] eq "-no-fix-unreq") {
22    $fix_unreq = 0;
23    shift @gcc_cmd;
24}
25
26if (grep /\.c$/, @gcc_cmd) {
27    # C file (inline asm?) - compile
28    @preprocess_c_cmd = (@gcc_cmd, "-S");
29} elsif (grep /\.[sS]$/, @gcc_cmd) {
30    # asm file, just do C preprocessor
31    @preprocess_c_cmd = (@gcc_cmd, "-E");
32} else {
33    die "Unrecognized input filetype";
34}
35
36# if compiling, avoid creating an output file named '-.o'
37if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
38    foreach my $i (@gcc_cmd) {
39        if ($i =~ /\.[csS]$/) {
40            my $outputfile = $i;
41            $outputfile =~ s/\.[csS]$/.o/;
42            push(@gcc_cmd, "-o");
43            push(@gcc_cmd, $outputfile);
44            last;
45        }
46    }
47}
48@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
49@preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
50
51my $comm;
52
53# detect architecture from gcc binary name
54if      ($gcc_cmd[0] =~ /arm/) {
55    $comm = '@';
56} elsif ($gcc_cmd[0] =~ /powerpc|ppc/) {
57    $comm = '#';
58}
59
60# look for -arch flag
61foreach my $i (1 .. $#gcc_cmd-1) {
62    if ($gcc_cmd[$i] eq "-arch") {
63        if ($gcc_cmd[$i+1] =~ /arm/) {
64            $comm = '@';
65        } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) {
66            $comm = '#';
67        }
68    }
69}
70
71# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
72if (!$comm) {
73    my $native_arch = qx/arch/;
74    if ($native_arch =~ /arm/) {
75        $comm = '@';
76    } elsif ($native_arch =~ /powerpc|ppc/) {
77        $comm = '#';
78    }
79}
80
81if (!$comm) {
82    die "Unable to identify target architecture";
83}
84
85my %ppc_spr = (ctr    => 9,
86               vrsave => 256);
87
88open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
89
90my $current_macro = '';
91my $macro_level = 0;
92my %macro_lines;
93my %macro_args;
94my %macro_args_default;
95my $macro_count = 0;
96my $altmacro = 0;
97
98my @pass1_lines;
99my @ifstack;
100
101my %symbols;
102
103# pass 1: parse .macro
104# note that the handling of arguments is probably overly permissive vs. gas
105# but it should be the same for valid cases
106while (<ASMFILE>) {
107    # remove all comments (to avoid interfering with evaluating directives)
108    s/(?<!\\)$comm.*//x;
109
110    # comment out unsupported directives
111    s/\.type/$comm.type/x;
112    s/\.func/$comm.func/x;
113    s/\.endfunc/$comm.endfunc/x;
114    s/\.ltorg/$comm.ltorg/x;
115    s/\.size/$comm.size/x;
116    s/\.fpu/$comm.fpu/x;
117    s/\.arch/$comm.arch/x;
118    s/\.object_arch/$comm.object_arch/x;
119
120    # the syntax for these is a little different
121    s/\.global/.globl/x;
122    # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
123    s/(.*)\.rodata/.const_data/x;
124    s/\.bss/.data/x;
125    s/\.int/.long/x;
126    s/\.float/.single/x;
127
128    # catch unknown section names that aren't mach-o style (with a comma)
129    if (/.section ([^,]*)$/) {
130        die ".section $1 unsupported; figure out the mach-o section name and add it";
131    }
132
133    parse_line($_);
134}
135
136sub eval_expr {
137    my $expr = $_[0];
138    $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g;
139    eval $expr;
140}
141
142sub handle_if {
143    my $line = $_[0];
144    # handle .if directives; apple's assembler doesn't support important non-basic ones
145    # evaluating them is also needed to handle recursive macros
146    if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
147        my $result = $1 eq "n";
148        my $type   = $2;
149        my $expr   = $3;
150
151        if ($type eq "b") {
152            $expr =~ s/\s//g;
153            $result ^= $expr eq "";
154        } elsif ($type eq "c") {
155            if ($expr =~ /(.*)\s*,\s*(.*)/) {
156                $result ^= $1 eq $2;
157            } else {
158                die "argument to .ifc not recognized";
159            }
160        } elsif ($type eq "") {
161            $result ^= eval_expr($expr) != 0;
162        } elsif ($type eq "eq") {
163            $result = eval_expr($expr) == 0;
164        } elsif ($type eq "lt") {
165            $result = eval_expr($expr) < 0;
166        } else {
167	    chomp($line);
168            die "unhandled .if varient. \"$line\"";
169        }
170        push (@ifstack, $result);
171        return 1;
172    } else {
173        return 0;
174    }
175}
176
177sub parse_line {
178    my $line = @_[0];
179
180    # evaluate .if blocks
181    if (scalar(@ifstack)) {
182        if (/\.endif/) {
183            pop(@ifstack);
184            return;
185        } elsif ($line =~ /\.elseif\s+(.*)/) {
186            if ($ifstack[-1] == 0) {
187                $ifstack[-1] = !!eval_expr($1);
188            } elsif ($ifstack[-1] > 0) {
189                $ifstack[-1] = -$ifstack[-1];
190            }
191            return;
192        } elsif (/\.else/) {
193            $ifstack[-1] = !$ifstack[-1];
194            return;
195        } elsif (handle_if($line)) {
196            return;
197        }
198
199        # discard lines in false .if blocks
200        foreach my $i (0 .. $#ifstack) {
201            if ($ifstack[$i] <= 0) {
202                return;
203            }
204        }
205    }
206
207    if (/\.macro/) {
208        $macro_level++;
209        if ($macro_level > 1 && !$current_macro) {
210            die "nested macros but we don't have master macro";
211        }
212    } elsif (/\.endm/) {
213        $macro_level--;
214        if ($macro_level < 0) {
215            die "unmatched .endm";
216        } elsif ($macro_level == 0) {
217            $current_macro = '';
218            return;
219        }
220    }
221
222    if ($macro_level > 1) {
223        push(@{$macro_lines{$current_macro}}, $line);
224    } elsif ($macro_level == 0) {
225        expand_macros($line);
226    } else {
227        if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) {
228            $current_macro = $1;
229
230            # commas in the argument list are optional, so only use whitespace as the separator
231            my $arglist = $2;
232            $arglist =~ s/,/ /g;
233
234            my @args = split(/\s+/, $arglist);
235            foreach my $i (0 .. $#args) {
236                my @argpair = split(/=/, $args[$i]);
237                $macro_args{$current_macro}[$i] = $argpair[0];
238                $argpair[0] =~ s/:vararg$//;
239                $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
240            }
241            # ensure %macro_lines has the macro name added as a key
242            $macro_lines{$current_macro} = [];
243
244        } elsif ($current_macro) {
245            push(@{$macro_lines{$current_macro}}, $line);
246        } else {
247            die "macro level without a macro name";
248        }
249    }
250}
251
252sub expand_macros {
253    my $line = @_[0];
254
255    # handle .if directives; apple's assembler doesn't support important non-basic ones
256    # evaluating them is also needed to handle recursive macros
257    if (handle_if($line)) {
258        return;
259    }
260
261    if (/\.purgem\s+([\d\w\.]+)/) {
262        delete $macro_lines{$1};
263        delete $macro_args{$1};
264        delete $macro_args_default{$1};
265        return;
266    }
267
268    if ($line =~ /\.altmacro/) {
269        $altmacro = 1;
270        return;
271    }
272
273    if ($line =~ /\.noaltmacro/) {
274        $altmacro = 0;
275        return;
276    }
277
278    $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
279
280    if ($line =~ /\.set\s+(.*),\s*(.*)/) {
281        $symbols{$1} = eval_expr($2);
282    }
283
284    if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
285        push(@pass1_lines, $1);
286        my $macro = $2;
287
288        # commas are optional here too, but are syntactically important because
289        # parameters can be blank
290        my @arglist = split(/,/, $3);
291        my @args;
292        my @args_seperator;
293
294        my $comma_sep_required = 0;
295        foreach (@arglist) {
296            # allow arithmetic/shift operators in macro arguments
297            $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g;
298
299            my @whitespace_split = split(/\s+/, $_);
300            if (!@whitespace_split) {
301                push(@args, '');
302                push(@args_seperator, '');
303            } else {
304                foreach (@whitespace_split) {
305                        #print ("arglist = \"$_\"\n");
306                    if (length($_)) {
307                        push(@args, $_);
308                        my $sep = $comma_sep_required ? "," : " ";
309                        push(@args_seperator, $sep);
310                        #print ("sep = \"$sep\", arg = \"$_\"\n");
311                        $comma_sep_required = 0;
312                    }
313                }
314            }
315
316            $comma_sep_required = 1;
317        }
318
319        my %replacements;
320        if ($macro_args_default{$macro}){
321            %replacements = %{$macro_args_default{$macro}};
322        }
323
324        # construct hashtable of text to replace
325        foreach my $i (0 .. $#args) {
326            my $argname = $macro_args{$macro}[$i];
327            my @macro_args = @{ $macro_args{$macro} };
328            if ($args[$i] =~ m/=/) {
329                # arg=val references the argument name
330                # XXX: I'm not sure what the expected behaviour if a lot of
331                # these are mixed with unnamed args
332                my @named_arg = split(/=/, $args[$i]);
333                $replacements{$named_arg[0]} = $named_arg[1];
334            } elsif ($i > $#{$macro_args{$macro}}) {
335                # more args given than the macro has named args
336                # XXX: is vararg allowed on arguments before the last?
337                $argname = $macro_args{$macro}[-1];
338                if ($argname =~ s/:vararg$//) {
339                    #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
340                    #$replacements{$argname} .= ", $args[$i]";
341                    $replacements{$argname} .= "$args_seperator[$i] $args[$i]";
342                } else {
343                    die "Too many arguments to macro $macro";
344                }
345            } else {
346                $argname =~ s/:vararg$//;
347                $replacements{$argname} = $args[$i];
348            }
349        }
350
351        my $count = $macro_count++;
352
353        # apply replacements as regex
354        foreach (@{$macro_lines{$macro}}) {
355            my $macro_line = $_;
356            # do replacements by longest first, this avoids wrong replacement
357            # when argument names are subsets of each other
358            foreach (reverse sort {length $a <=> length $b} keys %replacements) {
359                $macro_line =~ s/\\$_/$replacements{$_}/g;
360            }
361            $macro_line =~ s/\\\@/$count/g;
362            $macro_line =~ s/\\\(\)//g;     # remove \()
363            parse_line($macro_line);
364        }
365    } else {
366        push(@pass1_lines, $line);
367    }
368}
369
370close(ASMFILE) or exit 1;
371open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
372#open(ASMFILE, ">/tmp/a.S") or die "Error running assembler";
373
374my @sections;
375my $num_repts;
376my $rept_lines;
377
378my %literal_labels;     # for ldr <reg>, =<expr>
379my $literal_num = 0;
380
381my $thumb = 0;
382
383my %thumb_labels;
384my %call_targets;
385
386my $in_irp = 0;
387my @irp_args;
388my $irp_param;
389
390# pass 2: parse .rept and .if variants
391# NOTE: since we don't implement a proper parser, using .rept with a
392# variable assigned from .set is not supported
393foreach my $line (@pass1_lines) {
394    # handle .previous (only with regard to .section not .subsection)
395    if ($line =~ /\.(section|text|const_data)/) {
396        push(@sections, $line);
397    } elsif ($line =~ /\.previous/) {
398        if (!$sections[-2]) {
399            die ".previous without a previous section";
400        }
401        $line = $sections[-2];
402        push(@sections, $line);
403    }
404
405    $thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
406    $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
407
408    # handle ldr <reg>, =<expr>
409    if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
410        my $label = $literal_labels{$3};
411        if (!$label) {
412            $label = "Literal_$literal_num";
413            $literal_num++;
414            $literal_labels{$3} = $label;
415        }
416        $line = "$1 ldr$2, $label\n";
417    } elsif ($line =~ /\.ltorg/) {
418        $line .= ".align 2\n";
419        foreach my $literal (keys %literal_labels) {
420            $line .= "$literal_labels{$literal}:\n .word $literal\n";
421        }
422        %literal_labels = ();
423    }
424
425    # thumb add with large immediate needs explicit add.w
426    if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
427        $line =~ s/add/add.w/ if eval_expr($1) > 255;
428    }
429
430    # mach-o local symbol names start with L (no dot)
431    $line =~ s/(?<!\w)\.(L\w+)/$1/g;
432
433    if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
434        $thumb_labels{$1}++;
435    }
436
437    if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) {
438        $call_targets{$3}++;
439    }
440
441    # @l -> lo16()  @ha -> ha16()
442    $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
443    $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
444
445    # move to/from SPR
446    if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
447        if ($2 eq 'mt') {
448            $line = "$1${2}spr $ppc_spr{$3}, $4\n";
449        } else {
450            $line = "$1${2}spr $4, $ppc_spr{$3}\n";
451        }
452    }
453
454    # old gas versions store upper and lower case names on .req,
455    # but they remove only one on .unreq
456    if ($fix_unreq) {
457        if ($line =~ /\.unreq\s+(.*)/) {
458            $line = ".unreq " . lc($1) . "\n";
459            print ASMFILE ".unreq " . uc($1) . "\n";
460        }
461    }
462
463    if ($line =~ /\.rept\s+(.*)/) {
464        $num_repts = $1;
465        $rept_lines = "\n";
466
467        # handle the possibility of repeating another directive on the same line
468        # .endr on the same line is not valid, I don't know if a non-directive is
469        if ($num_repts =~ s/(\.\w+.*)//) {
470            $rept_lines .= "$1\n";
471        }
472        $num_repts = eval($num_repts);
473    } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
474        $in_irp = 1;
475        $num_repts = 1;
476        $rept_lines = "\n";
477        $irp_param = $1;
478
479        # only use whitespace as the separator
480        my $irp_arglist = $2;
481        $irp_arglist =~ s/,/ /g;
482        $irp_arglist =~ s/^\s+//;
483        @irp_args = split(/\s+/, $irp_arglist);
484    } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
485        $in_irp = 1;
486        $num_repts = 1;
487        $rept_lines = "\n";
488        $irp_param = $1;
489
490        my $irp_arglist = $2;
491        $irp_arglist =~ s/,/ /g;
492        $irp_arglist =~ s/^\s+//;
493        @irp_args = split(//, $irp_arglist);
494    } elsif ($line =~ /\.endr/) {
495        if ($in_irp != 0) {
496            foreach my $i (@irp_args) {
497                my $line = $rept_lines;
498                $line =~ s/\\$irp_param/$i/g;
499                $line =~ s/\\\(\)//g;     # remove \()
500                print ASMFILE $line;
501            }
502        } else {
503            for (1 .. $num_repts) {
504                print ASMFILE $rept_lines;
505            }
506        }
507        $rept_lines = '';
508        $in_irp = 0;
509        @irp_args = '';
510    } elsif ($rept_lines) {
511        $rept_lines .= $line;
512    } else {
513        print ASMFILE $line;
514    }
515}
516
517print ASMFILE ".text\n";
518print ASMFILE ".align 2\n";
519foreach my $literal (keys %literal_labels) {
520    print ASMFILE "$literal_labels{$literal}:\n .word $literal\n";
521}
522
523map print(ASMFILE ".thumb_func $_\n"),
524    grep exists $thumb_labels{$_}, keys %call_targets;
525
526close(ASMFILE) or exit 1;
527#exit 1
528