1#!/usr/bin/perl 2# 3# NAME 4# interfuzz.pl -- C-INTERCAL optimiser fuzz-tester 5# 6# LICENSE TERMS 7# Copyright (C) 2010 Alex Smith 8# 9# This program is free software; you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 2 of the License, or 12# (at your option) any later version. 13# 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22# 23 24use strict; 25use warnings; 26use File::Temp qw/tempdir/; 27use File::Spec; 28 29use constant fuzzamount => 1000; 30use constant ickpath => $ENV{ICK} || '../build/ick'; 31use constant idiompath => $ENV{IDIOTISM} || '../src/idiotism.oil'; 32 33# Before getting to the main program, we need to read in idiom LHSes. 34print STDERR "Loading idioms...\n"; 35my $idioms = ''; 36my %idiomlist = (); 37my %idiomnames = (); 38open my $idiomfh, '<', idiompath; 39while(defined ($_=<$idiomfh>)) { 40 # OIL is whitespace-insensitive, except that comments go from 41 # semicolons to newlines. 42 s/;.*$//; 43 # Square brackets indicate idiom names. 44 s/\[(\w+)\]/($idiomnames{$1}=1),''/eg; 45 $idioms .= $_; 46} 47close $idiomfh; 48# Remove whitespace. 49$idioms =~ s/\s//g; 50# Eliminate C from the testbench, leaving only OIL. 51# Constants with a condition are changed to ,n rather than #n, to tell 52# them apart from constants with a stated value. 53$idioms =~ s/#\{[^{}]*\}/,/g; 54$idioms =~ s/([.:_])\{[^{}]*\}/$1/g; 55# Remove bitwidths. 56$idioms =~ s/([^#\d])(?:16|32)/$1/g; 57# Remove reference numbers. For idioms that need numbers to be equal, we 58# have to rely on the random chance of getting a duplication due to a 59# random duplication of operand or of variable. 60$idioms =~ s/([.:_,])\d+/$1/g; 61# Replace overlarge constants with mingles. 62$idioms =~ s/#(\d+)/$1 > 65535 ? mingle_constant($1) : "#$1"/eg; 63# Change _ ("allow any value") to . ("force 16-bit"); because the 64# optimiser is free to optimise errors into errors, but we want to 65# generate known non-erroring code. 66$idioms =~ s/_/./g; 67# To find idiom LHSes, we look for balanced paren groups on the LHS of a 68# ->. (This will miss idioms that use sparkears for the outermost group, 69# but nobody does that anyway, and it doesn't change the correctness of 70# this testbench, just makes it randomize in a slightly worse way.) 71# We ignore idioms containing the following characters: +-*/%<> because 72# they can't easily be translated into INTERCAL, and so there's no 73# direct way to cause the idiom to happen. 74$idioms =~ s/(\((?:[^-()+*\/\%<>]|(?1))+\))->/($idiomlist{$1}=1),"$1->"/eg; 75 76# We generate random INTERCAL expressions as follows: start with a 77# placeholder _1, then repeatedly replace the placeholder with a more 78# complicated expression, which is either a primitive equation (_1~_2, 79# _V1, etc) or the LHS of an idiom (in order to increase the chance of 80# finding idiom bugs). An OIL-style syntax is used internally in order 81# to make things easier, it's translated into INTERCAL at the end 82# (with any C operators replaced by INTERCAL definitions of them). The 83# placeholders are eventually changed to random constants (50% chance) 84# or to random variables in the set .123, except that every 85# expression contains at least one non-constant variable. Random 86# constants have a 50% chance of being completely random, and a 50% 87# chance of being a number somehow significant to INTERCAL idioms. 88# Instead of the OIL-like _1, we use _[1] in case we end up with more 89# than 9 placeholders. 90 91sub random_constant { 92 my $bitshift = int(rand(30)); 93 $bitshift == 23 and $bitshift = 1; # 2 and 3 are significant 94 $bitshift > 23 and $bitshift = 0; # but not as significant as 0 and 1 95 $bitshift > 16 and $bitshift = 16; # or as 65535 96 my $constant = 1 << $bitshift; 97 $constant > 65535 || (rand) > 0.5 and $constant--; 98 return '#'.$constant; 99} 100 101sub random_operand { 102 my $bitwidth = shift; 103 my $whichoperand = rand; 104 $whichoperand < 0.5 and return '.'.(int(rand(3))+1); 105 if ($bitwidth == 16) { 106 $whichoperand < 0.75 and return '#'.int(rand(65536)); 107 return random_constant; 108 } else { 109 $whichoperand < 0.75 and 110 return '(#'.int(rand(65536)).'$#'.int(rand(65536)).')'; 111 return '('.random_constant.'$'.random_constant.')'; 112 } 113} 114 115sub oil_expression { 116 my $expr = ':[1]'; 117 local $_; 118 my $idiomsused = 0; 119 while ((rand) < 0.9 || $expr eq ':[1]') { 120 my @placeholders = ($expr =~ /[.:]\[\d+\]/g); 121 last unless @placeholders; 122 my $unused_placeholder = 1; 123 /[.:]\[(\d+)\]/ and $1 >= $unused_placeholder 124 and $unused_placeholder = $1 + 1 for @placeholders; 125 my $replaced_placeholder = $placeholders[int(rand(@placeholders))]; 126 my $whichexpr = (rand); 127 my $newexpr = "*"; 128 if ($replaced_placeholder =~ /:/) {{ 129 $idiomsused > 0 and $whichexpr *= 0.4; 130 $whichexpr < 0.1 and $newexpr = '(:~:)', last; 131 $whichexpr < 0.2 and $newexpr = '(.$.)', last; 132 $whichexpr < 0.25 and $newexpr = '?:', last; 133 $whichexpr < 0.3 and $newexpr = 'V:', last; 134 $whichexpr < 0.35 and $newexpr = '&:', last; 135 if ($whichexpr < 0.4) { 136 $newexpr = $placeholders[int(rand(@placeholders))] 137 while $newexpr !~ /^\:/; 138 last; 139 } 140 # Grab an idiom from the list. 141 $newexpr = (keys %idiomlist)[int(rand(keys %idiomlist))]; 142 $idiomsused++; 143 }} else {{ 144 $whichexpr < 0.2 and $newexpr = '(.~.)', last; 145 $whichexpr < 0.6 and $newexpr = '(:~.)', last; 146 $whichexpr < 0.7 and $newexpr = '?.', last; 147 $whichexpr < 0.8 and $newexpr = 'V.', last; 148 $whichexpr < 0.9 and $newexpr = '&.', last; 149 $newexpr = $placeholders[int(rand(@placeholders))] 150 while $newexpr !~ /^\./; 151 }} 152 $newexpr =~ /[.,:]\[\d+\]/ or 153 $newexpr =~ s/([.,:])/$1."[".($unused_placeholder++)."]"/eg; 154 # Use {} as maybe-brackets; we don't know if $newexpr needs parens yet. 155 $expr =~ s/\Q$replaced_placeholder\E/{$newexpr}/g; 156 $expr =~ s/([?&V])\{([?&V][^{}]+)\}/$1($2)/g; 157 $expr =~ s/[{}]//g; 158 } 159 my $newop; 160 # Replace random operands 161 $newop = random_operand(16), $expr =~ s/\Q$_\E/$newop/g 162 for ($expr =~ /.\[\d+\]/g); 163 $newop = random_operand(32), $expr =~ s/\Q$_\E/$newop/g 164 for ($expr =~ /:\[\d+\]/g); 165 # and random constants 166 $newop = random_constant, $expr =~ s/\Q$_\E/$newop/g 167 for ($expr =~ /,\[\d+\]/g); 168 $expr =~ /[.:]/ or goto &oil_expression; 169 return $expr; 170} 171 172# A utility function for creating 32-bit C logical operations, to help 173# create code needed to trigger various idioms. 174sub clogop { 175 my $op = shift; 176 my $a = shift; 177 my $b = shift; 178 return "($op(($a~(#65535\$#0))\$($b~(#65535\$#0)))~(#0\$#65535))\$". 179 "($op(($a~(#0\$#65535))\$($b~(#0\$#65535)))~(#0\$#65535))"; 180} 181 182sub intercal_expression { 183 my $expr = oil_expression; 184 185 # C that came from idioms must be translated into INTERCAL. 186 my $intercal_operand = qr/[?\&V]?[#.:]\d+|[?\&V]?(\((?:[^()]|(?-1))+\))/; 187 { 188 $expr =~ s/(?'a'$intercal_operand)==(?'b'$intercal_operand)/!($+{a}^$+{b})/g 189 and redo; 190 $expr =~ s/(?'a'$intercal_operand)!=(?'b'$intercal_operand)/(($+{a}^$+{b})~($+{a}^$+{b}))~#1/g 191 and redo; 192 $expr =~ s/!(?'a'$intercal_operand)/?((($+{a}~$+{a})~#1)\$#1)~#1/g 193 and redo; 194 $expr =~ s/(?'a'$intercal_operand)\^(?'b'$intercal_operand)/clogop('?',$+{a},$+{b})/ge 195 and redo; 196 $expr =~ s/(?'a'$intercal_operand)\|(?'b'$intercal_operand)/clogop('V',$+{a},$+{b})/ge 197 and redo; 198 $expr =~ s/(?'a'$intercal_operand)\&(?'b'$intercal_operand)/clogop('&',$+{a},$+{b})/ge 199 and redo; 200 # C bitwise complement is tricky as there's a clash with INTERCAL 201 # binary select. So we check specifically for the ( before the ~. 202 # The other problem is guessing the bitwidth - 4294967295^x may 203 # overflow if x is actually 16-bit - so we instead just take a 204 # 16-bit complement. 205 $expr =~ s/\(~(?'a'$intercal_operand)/'('.clogop('?','#65535',$+{a})/ge and redo; 206 } 207 208 # Two changes are needed to allow for INTERCAL's insane syntax: 209 # move all unaries forwards one character, and change parens into 210 # spark/ears. In theory we could safely change everything to 211 # sparks as the location of operands disambiguates, but we'll be 212 # good and alternate in order to make life easier on a reader. 213 my $spark = 1; 214 $expr =~ s/([()])/($spark=!$spark),$spark ^ ($1 eq '(')?"'":'"'/ge; 215 $expr =~ s/([?&V]+)([#.'"])/$2$1/g; 216 return $expr; 217} 218 219# A utility function for creating 32-bit constants. 220 221sub mingle_constant { 222 my $c = shift; 223 my $a = 0; 224 my $b = 0; 225 for (0..15) { 226 $c & 1 and $b |= 65536; 227 $c & 2 and $a |= 65536; 228 $a >>= 1; 229 $b >>= 1; 230 $c >>= 2; 231 } 232 return "(#$a\$#$b)"; 233} 234 235# The main program: create a temporary directory, and an INTERCAL 236# program in it. We run fuzzamount expressions, each on fuzzamount 237# sets of variable values. 238 239print STDERR "Generating test data...\n"; 240my $dir = tempdir( CLEANUP => 1 ); 241my $fn = File::Spec->catfile($dir,"fuzz"); 242open my $fh, '>', "$fn.i"; 243my (@dot1, @dot2, @dot3); 244# Store the data in a massive stash. 245for (0..(fuzzamount-1)) { 246 print $fh "PLEASE .1 <- ", ($dot1[$_]=random_constant); 247 print $fh " DO .2 <- #", ($dot2[$_]=int(rand(65536))); 248 print $fh " DO .3 <- ", ($dot3[$_]=random_constant); 249 print $fh " DO STASH .1 + .2 + .3\n"; 250} 251# The computed ABSTAIN is used as a loop counter. 252print $fh "PLEASE ABSTAIN #".fuzzamount." FROM (3) (1) DO COME FROM (2)\n"; 253print $fh "DO RETRIEVE .1 + .2 + .3\n"; 254 255print STDERR "Generating test program...\n"; 256my $pleaseflop = 0; 257my @expressions = (); 258$|=1; 259for (0..(fuzzamount-1)) { 260 print STDERR "$_/",fuzzamount,"\r"; 261 $expressions[$_] = intercal_expression; 262 $pleaseflop && print $fh "PLEASE "; 263 $pleaseflop = !$pleaseflop; 264 print $fh "DO :1 <- ", $expressions[$_], " DO READ OUT :1\n"; 265} 266print $fh "(2) DO REINSTATE (3) (3) DO COME FROM (1) PLEASE GIVE UP\n"; 267close $fh; 268 269print STDERR "Running optimiser and compiling to C...\n"; 270my $ickpath = ickpath; 271my $optimiser_output = `$ickpath -bOch $fn.i 2>&1`; 272my %usedopts = (); 273$usedopts{$_}++ for $optimiser_output =~ m/\[(\w+)\]/g; 274# These prints are sent to stdout. 275print "Optimisations seen:\n"; 276(printf '%8d %s'."\n", $usedopts{$_}, $_), delete $idiomnames{scalar (/^([^_]+)/,$1)} 277 for sort {$usedopts{$b} <=> $usedopts{$a}} keys %usedopts; 278print " 0 $_\n" for sort keys %idiomnames; 279 280print STDERR "Compiling optimised program to executable...\n"; 281open my $prog, '<', "$fn.c"; 282my $line1 = <$prog>; 283close $prog; 284# We grab the compile-command that ick places into the generated 285# executable, and run it minus any -On option (because gcc is bad at 286# optimising large programs, and because we're not fuzzing /it/). 287my ($compilecommand) = $line1 =~ /compile-command:"([^"]+)"/; 288$compilecommand =~ s/-O\d//; 289system $compilecommand; 290 291print STDERR "Running optimised program...\n"; 292my $optimised_output = `$fn`; 293 294# -g suppresses optimisation 295print STDERR "Compiling program unoptimised...\n"; 296system "$ickpath -bg $fn.i"; 297 298print STDERR "Running unoptimised program...\n"; 299my $unoptimised_output = `$fn`; 300 301if ($optimised_output eq $unoptimised_output) { 302 print STDERR "No errors found!\n"; 303 exit(0); 304} 305 306print STDERR "Errors were found...\n"; 307# The remaining output is the error report, sent to stdout. 308print "Errors found:\n"; 309my @o1 = split $/, $unoptimised_output; 310my @o2 = split $/, $optimised_output; 311my %problemindices = (); 312my %problemlines = (); 313for my $index (0..$#o1) { 314 $o1[$index] eq $o2[$index] and next; 315 $problemindices{int($index/2)} = 1; 316 $problemlines{(int($index/2) % fuzzamount) + 1}++; 317} 318for my $index (sort {$a%fuzzamount <=> $b%fuzzamount} keys %problemindices) { 319 my $dataline = fuzzamount - 1 - int($index/fuzzamount); 320 my $ickline = ($index % fuzzamount) + 1; 321 $problemlines{$ickline} == -1 and next; 322 my $line2 = "Line $ickline (.1 = ". $dot1[$dataline]; 323 if ($problemlines{$ickline} > 500) { 324 $problemlines{$ickline} = -1; 325 $line2 = "Line $ickline fails on most input, e.g. (.1 = ". 326 $dot1[$dataline]; 327 } 328 if ($problemlines{$ickline} > 100) { 329 $problemlines{$ickline} = -1; 330 $line2 = "Line $ickline fails on many inputs, e.g. (.1 = ". 331 $dot1[$dataline]; 332 } 333 $line2 .= ", .2 = #". $dot2[$dataline]; 334 $line2 .= ", .3 = ". $dot3[$dataline] . "): u = "; 335 my $line1 = ' ' x length $line2; 336 $line1 .= $o1[$index*2]; 337 $line2 .= $o1[$index*2+1] . ", o = "; 338 $line1 .= ' ' x ((length $line2) - (length $line1)); 339 $line1 .= $o2[$index*2]; 340 $line2 .= $o2[$index*2+1]; 341 print "$line1\n$line2\n"; 342} 343for my $index (sort {$a <=> $b} keys %problemlines) { 344 print "Line $index is ".$expressions[$index-1]."\n"; 345} 346 347exit(1); 348