1#!/usr/bin/env perl 2# by David Conrad 3# This code is licensed under GPLv2 or later; go to gnu.org to read it 4# (not that it much matters for an asm preprocessor) 5# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" 6use strict; 7 8# Apple's gas is ancient and doesn't support modern preprocessing features like 9# .rept and has ugly macro syntax, among other things. Thus, this script 10# implements the subset of the gas preprocessor used by x264 and ffmpeg 11# that isn't supported by Apple's gas. 12 13my @gcc_cmd = @ARGV; 14my @preprocess_c_cmd; 15 16my $fix_unreq = $^O eq "darwin"; 17 18if ($gcc_cmd[0] eq "-fix-unreq") { 19 $fix_unreq = 1; 20 shift @gcc_cmd; 21} elsif ($gcc_cmd[0] eq "-no-fix-unreq") { 22 $fix_unreq = 0; 23 shift @gcc_cmd; 24} 25 26if (grep /\.c$/, @gcc_cmd) { 27 # C file (inline asm?) - compile 28 @preprocess_c_cmd = (@gcc_cmd, "-S"); 29} elsif (grep /\.[sS]$/, @gcc_cmd) { 30 # asm file, just do C preprocessor 31 @preprocess_c_cmd = (@gcc_cmd, "-E"); 32} else { 33 die "Unrecognized input filetype"; 34} 35 36# if compiling, avoid creating an output file named '-.o' 37if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) { 38 foreach my $i (@gcc_cmd) { 39 if ($i =~ /\.[csS]$/) { 40 my $outputfile = $i; 41 $outputfile =~ s/\.[csS]$/.o/; 42 push(@gcc_cmd, "-o"); 43 push(@gcc_cmd, $outputfile); 44 last; 45 } 46 } 47} 48@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; 49@preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd; 50 51my $comm; 52 53# detect architecture from gcc binary name 54if ($gcc_cmd[0] =~ /arm/) { 55 $comm = '@'; 56} elsif ($gcc_cmd[0] =~ /powerpc|ppc/) { 57 $comm = '#'; 58} 59 60# look for -arch flag 61foreach my $i (1 .. $#gcc_cmd-1) { 62 if ($gcc_cmd[$i] eq "-arch") { 63 if ($gcc_cmd[$i+1] =~ /arm/) { 64 $comm = '@'; 65 } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) { 66 $comm = '#'; 67 } 68 } 69} 70 71# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name 72if (!$comm) { 73 my $native_arch = qx/arch/; 74 if ($native_arch =~ /arm/) { 75 $comm = '@'; 76 } elsif ($native_arch =~ /powerpc|ppc/) { 77 $comm = '#'; 78 } 79} 80 81if (!$comm) { 82 die "Unable to identify target architecture"; 83} 84 85my %ppc_spr = (ctr => 9, 86 vrsave => 256); 87 88open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor"; 89 90my $current_macro = ''; 91my $macro_level = 0; 92my %macro_lines; 93my %macro_args; 94my %macro_args_default; 95my $macro_count = 0; 96my $altmacro = 0; 97 98my @pass1_lines; 99my @ifstack; 100 101my %symbols; 102 103# pass 1: parse .macro 104# note that the handling of arguments is probably overly permissive vs. gas 105# but it should be the same for valid cases 106while (<ASMFILE>) { 107 # remove all comments (to avoid interfering with evaluating directives) 108 s/(?<!\\)$comm.*//x; 109 110 # comment out unsupported directives 111 s/\.type/$comm.type/x; 112 s/\.func/$comm.func/x; 113 s/\.endfunc/$comm.endfunc/x; 114 s/\.ltorg/$comm.ltorg/x; 115 s/\.size/$comm.size/x; 116 s/\.fpu/$comm.fpu/x; 117 s/\.arch/$comm.arch/x; 118 s/\.object_arch/$comm.object_arch/x; 119 120 # the syntax for these is a little different 121 s/\.global/.globl/x; 122 # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const 123 s/(.*)\.rodata/.const_data/x; 124 s/\.bss/.data/x; 125 s/\.int/.long/x; 126 s/\.float/.single/x; 127 128 # catch unknown section names that aren't mach-o style (with a comma) 129 if (/.section ([^,]*)$/) { 130 die ".section $1 unsupported; figure out the mach-o section name and add it"; 131 } 132 133 parse_line($_); 134} 135 136sub eval_expr { 137 my $expr = $_[0]; 138 $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g; 139 eval $expr; 140} 141 142sub handle_if { 143 my $line = $_[0]; 144 # handle .if directives; apple's assembler doesn't support important non-basic ones 145 # evaluating them is also needed to handle recursive macros 146 if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) { 147 my $result = $1 eq "n"; 148 my $type = $2; 149 my $expr = $3; 150 151 if ($type eq "b") { 152 $expr =~ s/\s//g; 153 $result ^= $expr eq ""; 154 } elsif ($type eq "c") { 155 if ($expr =~ /(.*)\s*,\s*(.*)/) { 156 $result ^= $1 eq $2; 157 } else { 158 die "argument to .ifc not recognized"; 159 } 160 } elsif ($type eq "") { 161 $result ^= eval_expr($expr) != 0; 162 } elsif ($type eq "eq") { 163 $result = eval_expr($expr) == 0; 164 } elsif ($type eq "lt") { 165 $result = eval_expr($expr) < 0; 166 } else { 167 chomp($line); 168 die "unhandled .if varient. \"$line\""; 169 } 170 push (@ifstack, $result); 171 return 1; 172 } else { 173 return 0; 174 } 175} 176 177sub parse_line { 178 my $line = @_[0]; 179 180 # evaluate .if blocks 181 if (scalar(@ifstack)) { 182 if (/\.endif/) { 183 pop(@ifstack); 184 return; 185 } elsif ($line =~ /\.elseif\s+(.*)/) { 186 if ($ifstack[-1] == 0) { 187 $ifstack[-1] = !!eval_expr($1); 188 } elsif ($ifstack[-1] > 0) { 189 $ifstack[-1] = -$ifstack[-1]; 190 } 191 return; 192 } elsif (/\.else/) { 193 $ifstack[-1] = !$ifstack[-1]; 194 return; 195 } elsif (handle_if($line)) { 196 return; 197 } 198 199 # discard lines in false .if blocks 200 foreach my $i (0 .. $#ifstack) { 201 if ($ifstack[$i] <= 0) { 202 return; 203 } 204 } 205 } 206 207 if (/\.macro/) { 208 $macro_level++; 209 if ($macro_level > 1 && !$current_macro) { 210 die "nested macros but we don't have master macro"; 211 } 212 } elsif (/\.endm/) { 213 $macro_level--; 214 if ($macro_level < 0) { 215 die "unmatched .endm"; 216 } elsif ($macro_level == 0) { 217 $current_macro = ''; 218 return; 219 } 220 } 221 222 if ($macro_level > 1) { 223 push(@{$macro_lines{$current_macro}}, $line); 224 } elsif ($macro_level == 0) { 225 expand_macros($line); 226 } else { 227 if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) { 228 $current_macro = $1; 229 230 # commas in the argument list are optional, so only use whitespace as the separator 231 my $arglist = $2; 232 $arglist =~ s/,/ /g; 233 234 my @args = split(/\s+/, $arglist); 235 foreach my $i (0 .. $#args) { 236 my @argpair = split(/=/, $args[$i]); 237 $macro_args{$current_macro}[$i] = $argpair[0]; 238 $argpair[0] =~ s/:vararg$//; 239 $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1]; 240 } 241 # ensure %macro_lines has the macro name added as a key 242 $macro_lines{$current_macro} = []; 243 244 } elsif ($current_macro) { 245 push(@{$macro_lines{$current_macro}}, $line); 246 } else { 247 die "macro level without a macro name"; 248 } 249 } 250} 251 252sub expand_macros { 253 my $line = @_[0]; 254 255 # handle .if directives; apple's assembler doesn't support important non-basic ones 256 # evaluating them is also needed to handle recursive macros 257 if (handle_if($line)) { 258 return; 259 } 260 261 if (/\.purgem\s+([\d\w\.]+)/) { 262 delete $macro_lines{$1}; 263 delete $macro_args{$1}; 264 delete $macro_args_default{$1}; 265 return; 266 } 267 268 if ($line =~ /\.altmacro/) { 269 $altmacro = 1; 270 return; 271 } 272 273 if ($line =~ /\.noaltmacro/) { 274 $altmacro = 0; 275 return; 276 } 277 278 $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro; 279 280 if ($line =~ /\.set\s+(.*),\s*(.*)/) { 281 $symbols{$1} = eval_expr($2); 282 } 283 284 if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) { 285 push(@pass1_lines, $1); 286 my $macro = $2; 287 288 # commas are optional here too, but are syntactically important because 289 # parameters can be blank 290 my @arglist = split(/,/, $3); 291 my @args; 292 my @args_seperator; 293 294 my $comma_sep_required = 0; 295 foreach (@arglist) { 296 # allow arithmetic/shift operators in macro arguments 297 $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g; 298 299 my @whitespace_split = split(/\s+/, $_); 300 if (!@whitespace_split) { 301 push(@args, ''); 302 push(@args_seperator, ''); 303 } else { 304 foreach (@whitespace_split) { 305 #print ("arglist = \"$_\"\n"); 306 if (length($_)) { 307 push(@args, $_); 308 my $sep = $comma_sep_required ? "," : " "; 309 push(@args_seperator, $sep); 310 #print ("sep = \"$sep\", arg = \"$_\"\n"); 311 $comma_sep_required = 0; 312 } 313 } 314 } 315 316 $comma_sep_required = 1; 317 } 318 319 my %replacements; 320 if ($macro_args_default{$macro}){ 321 %replacements = %{$macro_args_default{$macro}}; 322 } 323 324 # construct hashtable of text to replace 325 foreach my $i (0 .. $#args) { 326 my $argname = $macro_args{$macro}[$i]; 327 my @macro_args = @{ $macro_args{$macro} }; 328 if ($args[$i] =~ m/=/) { 329 # arg=val references the argument name 330 # XXX: I'm not sure what the expected behaviour if a lot of 331 # these are mixed with unnamed args 332 my @named_arg = split(/=/, $args[$i]); 333 $replacements{$named_arg[0]} = $named_arg[1]; 334 } elsif ($i > $#{$macro_args{$macro}}) { 335 # more args given than the macro has named args 336 # XXX: is vararg allowed on arguments before the last? 337 $argname = $macro_args{$macro}[-1]; 338 if ($argname =~ s/:vararg$//) { 339 #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n"; 340 #$replacements{$argname} .= ", $args[$i]"; 341 $replacements{$argname} .= "$args_seperator[$i] $args[$i]"; 342 } else { 343 die "Too many arguments to macro $macro"; 344 } 345 } else { 346 $argname =~ s/:vararg$//; 347 $replacements{$argname} = $args[$i]; 348 } 349 } 350 351 my $count = $macro_count++; 352 353 # apply replacements as regex 354 foreach (@{$macro_lines{$macro}}) { 355 my $macro_line = $_; 356 # do replacements by longest first, this avoids wrong replacement 357 # when argument names are subsets of each other 358 foreach (reverse sort {length $a <=> length $b} keys %replacements) { 359 $macro_line =~ s/\\$_/$replacements{$_}/g; 360 } 361 $macro_line =~ s/\\\@/$count/g; 362 $macro_line =~ s/\\\(\)//g; # remove \() 363 parse_line($macro_line); 364 } 365 } else { 366 push(@pass1_lines, $line); 367 } 368} 369 370close(ASMFILE) or exit 1; 371open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler"; 372#open(ASMFILE, ">/tmp/a.S") or die "Error running assembler"; 373 374my @sections; 375my $num_repts; 376my $rept_lines; 377 378my %literal_labels; # for ldr <reg>, =<expr> 379my $literal_num = 0; 380 381my $thumb = 0; 382 383my %thumb_labels; 384my %call_targets; 385 386my $in_irp = 0; 387my @irp_args; 388my $irp_param; 389 390# pass 2: parse .rept and .if variants 391# NOTE: since we don't implement a proper parser, using .rept with a 392# variable assigned from .set is not supported 393foreach my $line (@pass1_lines) { 394 # handle .previous (only with regard to .section not .subsection) 395 if ($line =~ /\.(section|text|const_data)/) { 396 push(@sections, $line); 397 } elsif ($line =~ /\.previous/) { 398 if (!$sections[-2]) { 399 die ".previous without a previous section"; 400 } 401 $line = $sections[-2]; 402 push(@sections, $line); 403 } 404 405 $thumb = 1 if $line =~ /\.code\s+16|\.thumb/; 406 $thumb = 0 if $line =~ /\.code\s+32|\.arm/; 407 408 # handle ldr <reg>, =<expr> 409 if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) { 410 my $label = $literal_labels{$3}; 411 if (!$label) { 412 $label = "Literal_$literal_num"; 413 $literal_num++; 414 $literal_labels{$3} = $label; 415 } 416 $line = "$1 ldr$2, $label\n"; 417 } elsif ($line =~ /\.ltorg/) { 418 $line .= ".align 2\n"; 419 foreach my $literal (keys %literal_labels) { 420 $line .= "$literal_labels{$literal}:\n .word $literal\n"; 421 } 422 %literal_labels = (); 423 } 424 425 # thumb add with large immediate needs explicit add.w 426 if ($thumb and $line =~ /add\s+.*#([^@]+)/) { 427 $line =~ s/add/add.w/ if eval_expr($1) > 255; 428 } 429 430 # mach-o local symbol names start with L (no dot) 431 $line =~ s/(?<!\w)\.(L\w+)/$1/g; 432 433 if ($thumb and $line =~ /^\s*(\w+)\s*:/) { 434 $thumb_labels{$1}++; 435 } 436 437 if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) { 438 $call_targets{$3}++; 439 } 440 441 # @l -> lo16() @ha -> ha16() 442 $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g; 443 $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g; 444 445 # move to/from SPR 446 if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) { 447 if ($2 eq 'mt') { 448 $line = "$1${2}spr $ppc_spr{$3}, $4\n"; 449 } else { 450 $line = "$1${2}spr $4, $ppc_spr{$3}\n"; 451 } 452 } 453 454 # old gas versions store upper and lower case names on .req, 455 # but they remove only one on .unreq 456 if ($fix_unreq) { 457 if ($line =~ /\.unreq\s+(.*)/) { 458 $line = ".unreq " . lc($1) . "\n"; 459 print ASMFILE ".unreq " . uc($1) . "\n"; 460 } 461 } 462 463 if ($line =~ /\.rept\s+(.*)/) { 464 $num_repts = $1; 465 $rept_lines = "\n"; 466 467 # handle the possibility of repeating another directive on the same line 468 # .endr on the same line is not valid, I don't know if a non-directive is 469 if ($num_repts =~ s/(\.\w+.*)//) { 470 $rept_lines .= "$1\n"; 471 } 472 $num_repts = eval($num_repts); 473 } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) { 474 $in_irp = 1; 475 $num_repts = 1; 476 $rept_lines = "\n"; 477 $irp_param = $1; 478 479 # only use whitespace as the separator 480 my $irp_arglist = $2; 481 $irp_arglist =~ s/,/ /g; 482 $irp_arglist =~ s/^\s+//; 483 @irp_args = split(/\s+/, $irp_arglist); 484 } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) { 485 $in_irp = 1; 486 $num_repts = 1; 487 $rept_lines = "\n"; 488 $irp_param = $1; 489 490 my $irp_arglist = $2; 491 $irp_arglist =~ s/,/ /g; 492 $irp_arglist =~ s/^\s+//; 493 @irp_args = split(//, $irp_arglist); 494 } elsif ($line =~ /\.endr/) { 495 if ($in_irp != 0) { 496 foreach my $i (@irp_args) { 497 my $line = $rept_lines; 498 $line =~ s/\\$irp_param/$i/g; 499 $line =~ s/\\\(\)//g; # remove \() 500 print ASMFILE $line; 501 } 502 } else { 503 for (1 .. $num_repts) { 504 print ASMFILE $rept_lines; 505 } 506 } 507 $rept_lines = ''; 508 $in_irp = 0; 509 @irp_args = ''; 510 } elsif ($rept_lines) { 511 $rept_lines .= $line; 512 } else { 513 print ASMFILE $line; 514 } 515} 516 517print ASMFILE ".text\n"; 518print ASMFILE ".align 2\n"; 519foreach my $literal (keys %literal_labels) { 520 print ASMFILE "$literal_labels{$literal}:\n .word $literal\n"; 521} 522 523map print(ASMFILE ".thumb_func $_\n"), 524 grep exists $thumb_labels{$_}, keys %call_targets; 525 526close(ASMFILE) or exit 1; 527#exit 1 528