1#!/usr/bin/perl
2#
3# NAME
4#     interfuzz.pl -- C-INTERCAL optimiser fuzz-tester
5#
6# LICENSE TERMS
7#     Copyright (C) 2010 Alex Smith
8#
9#     This program is free software; you can redistribute it and/or modify
10#     it under the terms of the GNU General Public License as published by
11#     the Free Software Foundation; either version 2 of the License, or
12#     (at your option) any later version.
13#
14#     This program is distributed in the hope that it will be useful,
15#     but WITHOUT ANY WARRANTY; without even the implied warranty of
16#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17#     GNU General Public License for more details.
18#
19#     You should have received a copy of the GNU General Public License
20#     along with this program; if not, write to the Free Software
21#     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23
24use strict;
25use warnings;
26use File::Temp qw/tempdir/;
27use File::Spec;
28
29use constant fuzzamount => 1000;
30use constant ickpath => $ENV{ICK} || '../build/ick';
31use constant idiompath => $ENV{IDIOTISM} || '../src/idiotism.oil';
32
33# Before getting to the main program, we need to read in idiom LHSes.
34print STDERR "Loading idioms...\n";
35my $idioms = '';
36my %idiomlist = ();
37my %idiomnames = ();
38open my $idiomfh, '<', idiompath;
39while(defined ($_=<$idiomfh>)) {
40    # OIL is whitespace-insensitive, except that comments go from
41    # semicolons to newlines.
42    s/;.*$//;
43    # Square brackets indicate idiom names.
44    s/\[(\w+)\]/($idiomnames{$1}=1),''/eg;
45    $idioms .= $_;
46}
47close $idiomfh;
48# Remove whitespace.
49$idioms =~ s/\s//g;
50# Eliminate C from the testbench, leaving only OIL.
51# Constants with a condition are changed to ,n rather than #n, to tell
52# them apart from constants with a stated value.
53$idioms =~ s/#\{[^{}]*\}/,/g;
54$idioms =~ s/([.:_])\{[^{}]*\}/$1/g;
55# Remove bitwidths.
56$idioms =~ s/([^#\d])(?:16|32)/$1/g;
57# Remove reference numbers. For idioms that need numbers to be equal, we
58# have to rely on the random chance of getting a duplication due to a
59# random duplication of operand or of variable.
60$idioms =~ s/([.:_,])\d+/$1/g;
61# Replace overlarge constants with mingles.
62$idioms =~ s/#(\d+)/$1 > 65535 ? mingle_constant($1) : "#$1"/eg;
63# Change _ ("allow any value") to . ("force 16-bit"); because the
64# optimiser is free to optimise errors into errors, but we want to
65# generate known non-erroring code.
66$idioms =~ s/_/./g;
67# To find idiom LHSes, we look for balanced paren groups on the LHS of a
68# ->. (This will miss idioms that use sparkears for the outermost group,
69# but nobody does that anyway, and it doesn't change the correctness of
70# this testbench, just makes it randomize in a slightly worse way.)
71# We ignore idioms containing the following characters: +-*/%<> because
72# they can't easily be translated into INTERCAL, and so there's no
73# direct way to cause the idiom to happen.
74$idioms =~ s/(\((?:[^-()+*\/\%<>]|(?1))+\))->/($idiomlist{$1}=1),"$1->"/eg;
75
76# We generate random INTERCAL expressions as follows: start with a
77# placeholder _1, then repeatedly replace the placeholder with a more
78# complicated expression, which is either a primitive equation (_1~_2,
79# _V1, etc) or the LHS of an idiom (in order to increase the chance of
80# finding idiom bugs). An OIL-style syntax is used internally in order
81# to make things easier, it's translated into INTERCAL at the end
82# (with any C operators replaced by INTERCAL definitions of them). The
83# placeholders are eventually changed to random constants (50% chance)
84# or to random variables in the set .123, except that every
85# expression contains at least one non-constant variable. Random
86# constants have a 50% chance of being completely random, and a 50%
87# chance of being a number somehow significant to INTERCAL idioms.
88# Instead of the OIL-like _1, we use _[1] in case we end up with more
89# than 9 placeholders.
90
91sub random_constant {
92    my $bitshift = int(rand(30));
93    $bitshift == 23 and $bitshift = 1; # 2 and 3 are significant
94    $bitshift > 23 and $bitshift = 0;  # but not as significant as 0 and 1
95    $bitshift > 16 and $bitshift = 16; # or as 65535
96    my $constant = 1 << $bitshift;
97    $constant > 65535 || (rand) > 0.5 and $constant--;
98    return '#'.$constant;
99}
100
101sub random_operand {
102    my $bitwidth = shift;
103    my $whichoperand = rand;
104    $whichoperand < 0.5 and return '.'.(int(rand(3))+1);
105    if ($bitwidth == 16) {
106        $whichoperand < 0.75 and return '#'.int(rand(65536));
107        return random_constant;
108    } else {
109        $whichoperand < 0.75 and
110            return '(#'.int(rand(65536)).'$#'.int(rand(65536)).')';
111        return '('.random_constant.'$'.random_constant.')';
112    }
113}
114
115sub oil_expression {
116    my $expr = ':[1]';
117    local $_;
118    my $idiomsused = 0;
119    while ((rand) < 0.9 || $expr eq ':[1]') {
120        my @placeholders = ($expr =~ /[.:]\[\d+\]/g);
121        last unless @placeholders;
122        my $unused_placeholder = 1;
123        /[.:]\[(\d+)\]/ and $1 >= $unused_placeholder
124            and $unused_placeholder = $1 + 1 for @placeholders;
125        my $replaced_placeholder = $placeholders[int(rand(@placeholders))];
126        my $whichexpr = (rand);
127        my $newexpr = "*";
128        if ($replaced_placeholder =~ /:/) {{
129            $idiomsused > 0 and $whichexpr *= 0.4;
130            $whichexpr < 0.1  and $newexpr = '(:~:)', last;
131            $whichexpr < 0.2  and $newexpr = '(.$.)', last;
132            $whichexpr < 0.25 and $newexpr = '?:', last;
133            $whichexpr < 0.3  and $newexpr = 'V:', last;
134            $whichexpr < 0.35 and $newexpr = '&:', last;
135            if ($whichexpr < 0.4) {
136                $newexpr = $placeholders[int(rand(@placeholders))]
137                    while $newexpr !~ /^\:/;
138                last;
139            }
140            # Grab an idiom from the list.
141            $newexpr = (keys %idiomlist)[int(rand(keys %idiomlist))];
142            $idiomsused++;
143        }} else {{
144            $whichexpr < 0.2 and $newexpr = '(.~.)', last;
145            $whichexpr < 0.6 and $newexpr = '(:~.)', last;
146            $whichexpr < 0.7 and $newexpr = '?.', last;
147            $whichexpr < 0.8 and $newexpr = 'V.', last;
148            $whichexpr < 0.9 and $newexpr = '&.', last;
149            $newexpr = $placeholders[int(rand(@placeholders))]
150                while $newexpr !~ /^\./;
151        }}
152        $newexpr =~ /[.,:]\[\d+\]/ or
153            $newexpr =~ s/([.,:])/$1."[".($unused_placeholder++)."]"/eg;
154        # Use {} as maybe-brackets; we don't know if $newexpr needs parens yet.
155        $expr =~ s/\Q$replaced_placeholder\E/{$newexpr}/g;
156        $expr =~ s/([?&V])\{([?&V][^{}]+)\}/$1($2)/g;
157        $expr =~ s/[{}]//g;
158    }
159    my $newop;
160    # Replace random operands
161    $newop = random_operand(16), $expr =~ s/\Q$_\E/$newop/g
162        for ($expr =~ /.\[\d+\]/g);
163    $newop = random_operand(32), $expr =~ s/\Q$_\E/$newop/g
164        for ($expr =~ /:\[\d+\]/g);
165    # and random constants
166    $newop = random_constant, $expr =~ s/\Q$_\E/$newop/g
167        for ($expr =~ /,\[\d+\]/g);
168    $expr =~ /[.:]/ or goto &oil_expression;
169    return $expr;
170}
171
172# A utility function for creating 32-bit C logical operations, to help
173# create code needed to trigger various idioms.
174sub clogop {
175    my $op = shift;
176    my $a = shift;
177    my $b = shift;
178    return "($op(($a~(#65535\$#0))\$($b~(#65535\$#0)))~(#0\$#65535))\$".
179           "($op(($a~(#0\$#65535))\$($b~(#0\$#65535)))~(#0\$#65535))";
180}
181
182sub intercal_expression {
183    my $expr = oil_expression;
184
185    # C that came from idioms must be translated into INTERCAL.
186    my $intercal_operand = qr/[?\&V]?[#.:]\d+|[?\&V]?(\((?:[^()]|(?-1))+\))/;
187    {
188        $expr =~ s/(?'a'$intercal_operand)==(?'b'$intercal_operand)/!($+{a}^$+{b})/g
189            and redo;
190        $expr =~ s/(?'a'$intercal_operand)!=(?'b'$intercal_operand)/(($+{a}^$+{b})~($+{a}^$+{b}))~#1/g
191            and redo;
192        $expr =~ s/!(?'a'$intercal_operand)/?((($+{a}~$+{a})~#1)\$#1)~#1/g
193            and redo;
194        $expr =~ s/(?'a'$intercal_operand)\^(?'b'$intercal_operand)/clogop('?',$+{a},$+{b})/ge
195            and redo;
196        $expr =~ s/(?'a'$intercal_operand)\|(?'b'$intercal_operand)/clogop('V',$+{a},$+{b})/ge
197            and redo;
198        $expr =~ s/(?'a'$intercal_operand)\&(?'b'$intercal_operand)/clogop('&',$+{a},$+{b})/ge
199            and redo;
200        # C bitwise complement is tricky as there's a clash with INTERCAL
201        # binary select. So we check specifically for the ( before the ~.
202        # The other problem is guessing the bitwidth - 4294967295^x may
203        # overflow if x is actually 16-bit - so we instead just take a
204        # 16-bit complement.
205        $expr =~ s/\(~(?'a'$intercal_operand)/'('.clogop('?','#65535',$+{a})/ge and redo;
206    }
207
208    # Two changes are needed to allow for INTERCAL's insane syntax:
209    # move all unaries forwards one character, and change parens into
210    # spark/ears. In theory we could safely change everything to
211    # sparks as the location of operands disambiguates, but we'll be
212    # good and alternate in order to make life easier on a reader.
213    my $spark = 1;
214    $expr =~ s/([()])/($spark=!$spark),$spark ^ ($1 eq '(')?"'":'"'/ge;
215    $expr =~ s/([?&V]+)([#.'"])/$2$1/g;
216    return $expr;
217}
218
219# A utility function for creating 32-bit constants.
220
221sub mingle_constant {
222    my $c = shift;
223    my $a = 0;
224    my $b = 0;
225    for (0..15) {
226        $c & 1 and $b |= 65536;
227        $c & 2 and $a |= 65536;
228        $a >>= 1;
229        $b >>= 1;
230        $c >>= 2;
231    }
232    return "(#$a\$#$b)";
233}
234
235# The main program: create a temporary directory, and an INTERCAL
236# program in it. We run fuzzamount expressions, each on fuzzamount
237# sets of variable values.
238
239print STDERR "Generating test data...\n";
240my $dir = tempdir( CLEANUP => 1 );
241my $fn = File::Spec->catfile($dir,"fuzz");
242open my $fh, '>', "$fn.i";
243my (@dot1, @dot2, @dot3);
244# Store the data in a massive stash.
245for (0..(fuzzamount-1)) {
246    print $fh "PLEASE .1 <- ", ($dot1[$_]=random_constant);
247    print $fh " DO .2 <- #", ($dot2[$_]=int(rand(65536)));
248    print $fh " DO .3 <- ", ($dot3[$_]=random_constant);
249    print $fh " DO STASH .1 + .2 + .3\n";
250}
251# The computed ABSTAIN is used as a loop counter.
252print $fh "PLEASE ABSTAIN #".fuzzamount." FROM (3) (1) DO COME FROM (2)\n";
253print $fh "DO RETRIEVE .1 + .2 + .3\n";
254
255print STDERR "Generating test program...\n";
256my $pleaseflop = 0;
257my @expressions = ();
258$|=1;
259for (0..(fuzzamount-1)) {
260    print STDERR "$_/",fuzzamount,"\r";
261    $expressions[$_] = intercal_expression;
262    $pleaseflop && print $fh "PLEASE ";
263    $pleaseflop = !$pleaseflop;
264    print $fh "DO :1 <- ", $expressions[$_], " DO READ OUT :1\n";
265}
266print $fh "(2) DO REINSTATE (3) (3) DO COME FROM (1) PLEASE GIVE UP\n";
267close $fh;
268
269print STDERR "Running optimiser and compiling to C...\n";
270my $ickpath = ickpath;
271my $optimiser_output = `$ickpath -bOch $fn.i 2>&1`;
272my %usedopts = ();
273$usedopts{$_}++ for $optimiser_output =~ m/\[(\w+)\]/g;
274# These prints are sent to stdout.
275print "Optimisations seen:\n";
276(printf '%8d %s'."\n", $usedopts{$_}, $_), delete $idiomnames{scalar (/^([^_]+)/,$1)}
277    for sort {$usedopts{$b} <=> $usedopts{$a}} keys %usedopts;
278print "       0 $_\n" for sort keys %idiomnames;
279
280print STDERR "Compiling optimised program to executable...\n";
281open my $prog, '<', "$fn.c";
282my $line1 = <$prog>;
283close $prog;
284# We grab the compile-command that ick places into the generated
285# executable, and run it minus any -On option (because gcc is bad at
286# optimising large programs, and because we're not fuzzing /it/).
287my ($compilecommand) = $line1 =~ /compile-command:"([^"]+)"/;
288$compilecommand =~ s/-O\d//;
289system $compilecommand;
290
291print STDERR "Running optimised program...\n";
292my $optimised_output = `$fn`;
293
294# -g suppresses optimisation
295print STDERR "Compiling program unoptimised...\n";
296system "$ickpath -bg $fn.i";
297
298print STDERR "Running unoptimised program...\n";
299my $unoptimised_output = `$fn`;
300
301if ($optimised_output eq $unoptimised_output) {
302    print STDERR "No errors found!\n";
303    exit(0);
304}
305
306print STDERR "Errors were found...\n";
307# The remaining output is the error report, sent to stdout.
308print "Errors found:\n";
309my @o1 = split $/, $unoptimised_output;
310my @o2 = split $/, $optimised_output;
311my %problemindices = ();
312my %problemlines = ();
313for my $index (0..$#o1) {
314    $o1[$index] eq $o2[$index] and next;
315    $problemindices{int($index/2)} = 1;
316    $problemlines{(int($index/2) % fuzzamount) + 1}++;
317}
318for my $index (sort {$a%fuzzamount <=> $b%fuzzamount} keys %problemindices) {
319    my $dataline = fuzzamount - 1 - int($index/fuzzamount);
320    my $ickline = ($index % fuzzamount) + 1;
321    $problemlines{$ickline} == -1 and next;
322    my $line2 = "Line $ickline (.1 = ". $dot1[$dataline];
323    if ($problemlines{$ickline} > 500) {
324        $problemlines{$ickline} = -1;
325        $line2 = "Line $ickline fails on most input, e.g. (.1 = ".
326            $dot1[$dataline];
327    }
328    if ($problemlines{$ickline} > 100) {
329        $problemlines{$ickline} = -1;
330        $line2 = "Line $ickline fails on many inputs, e.g. (.1 = ".
331            $dot1[$dataline];
332    }
333    $line2 .= ", .2 = #". $dot2[$dataline];
334    $line2 .= ", .3 = ". $dot3[$dataline] . "): u = ";
335    my $line1 = ' ' x length $line2;
336    $line1 .= $o1[$index*2];
337    $line2 .= $o1[$index*2+1] . ", o = ";
338    $line1 .= ' ' x ((length $line2) - (length $line1));
339    $line1 .= $o2[$index*2];
340    $line2 .= $o2[$index*2+1];
341    print "$line1\n$line2\n";
342}
343for my $index (sort {$a <=> $b} keys %problemlines) {
344    print "Line $index is ".$expressions[$index-1]."\n";
345}
346
347exit(1);
348