1#!/usr/bin/env perl
2
3#
4#//===----------------------------------------------------------------------===//
5#//
6#// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7#// See https://llvm.org/LICENSE.txt for license information.
8#// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
9#//
10#//===----------------------------------------------------------------------===//
11#
12
13use strict;
14use warnings;
15
16use FindBin;
17use lib "$FindBin::Bin/lib";
18
19use tools;
20
21our $VERSION = "0.004";
22my $target_os;
23my $target_arch;
24my $target_mic_arch;
25
26my $hex = qr{[0-9a-f]}i;    # hex digit.
27
28# mic-specific details.
29
30sub bad_mic_fmt($) {
31    # Before we allowed both elf64-x86-64-freebsd and elf-l1om-freebsd.
32    # Now the first one is obsolete, only elf64-l1om-freebsd is allowed.
33    my ( $fmt ) = @_;
34    if ( 0 ) {
35    } elsif ( "$target_mic_arch" eq "knf" ) {
36	    return $fmt !~ m{\Aelf64-l1om?\z};
37    } elsif ( "$target_mic_arch" eq "knc" ) {
38	    return $fmt !~ m{\Aelf64-k1om?\z};
39	} else {
40	    return 1;
41	};
42}; # sub bad_mic_fmt
43
44# Undesired instructions for mic: all x87 and some other.
45# AC: Since compiler 2010-06-30 x87 instructions are supported, removed the check of x87.
46my $mic_bad_re;
47sub bad_mic_instr($$) {
48    my ( $instr, $args ) = @_;
49    if ( "$target_mic_arch" eq "knc" ) {
50	# workaround of bad code generation on KNF Linux* OS:
51	return ( defined( $instr ) and $instr =~ $mic_bad_re );
52    } else {
53	return ( defined( $instr ) and $instr =~ $mic_bad_re or defined( $args ) and $args =~ m{xmm}i );
54    }
55}; # sub bad_mic_instr
56
57# lin_32-specific details.
58
59sub bad_ia32_fmt($) {
60    my ( $fmt ) = @_;
61    return $fmt !~ m{\Aelf32-i386\z};
62}; # sub bad_ia32_fmt
63
64my @sse2 =
65    qw{
66        movapd movupd movhpd movlpd movmskpd movsd
67        addpd addsd subpd subsd mulpd mulsd divpd divsd sqrtpd sqrtsd maxpd maxsd minpd minsd
68        andpd andnpd orpd xorpd
69        cmppd cmpsd comisd ucomisd
70        shufpd unpckhpd unpcklpd
71        cvtpd2pi cvttpd2pi cvtpi2pd cvtpd2dq cvttpd2dq cvtdq2pd cvtps2pd cvtpd2ps cvtss2sd cvtsd2ss
72        cvtsd2si cvttsd2si cvtsi2sd cvtdq2ps cvtps2dq cvttps2dq movdqa movdqu movq2dq movdq2q
73        pmuludq paddq psubq pshuflw pshufhw pshufd pslldq psrldq punpckhqdq punpcklqdq clflush
74        lfence mfence maskmovdqu movntpd movntdq movnti
75    };
76my @sse3 =
77    qw{
78        fisttp lddqu addsubps addsubpd haddps hsubps haddpd hsubpd movshdup movsldup movddup monitor
79        mwait
80    };
81my @ssse3 =
82    qw{
83        phaddw phaddsw phaddd phsubw phsubsw phsubd pabsb pabsw pabsd pmaddubsw pmulhrsw pshufb
84        psignb psignw psignd palignr
85    };
86my @sse4 =
87    (
88        # SSE4.1
89        qw{
90            pmulld pmuldq dppd dpps movntdqa blendpd blendps blendvpd blendvps pblendvb pblendw pminuw
91            pminud pminsb pminsd pmaxuw pmaxud pmaxsb pmaxsd roundps roundpd roundss roundsd extractps
92            insertps pinsrb pinsrd pinsrq pextrb pextrw pextrd pextrq pmovsxbw pmovzxbw pmovsxbd
93            pmovzxbd pmovsxwd pmovzxwd pmovsxbq pmovzxbq pmovsxwq pmovzxwq pmovsxdq pmovzxdq mpsadbw
94            phminposuw ptest pcmpeqq packusdw
95        },
96        # SSE4.2
97        qw{
98            pcmpestri pcmpestrm pcmpistri pcmpistrm pcmpgtq crc32 popcnt
99        }
100    );
101
102# Undesired instructions for IA-32 architecture: Pentium 4 (SSE2) and newer.
103# TODO: It would be much more reliable to list *allowed* instructions rather than list undesired
104# instructions. In such a case the list will be stable and not require update when SSE5 is released.
105my @ia32_bad_list = ( @sse2, @sse3, @ssse3, @sse4 );
106
107my $ia32_bad_re = qr{@{[ "^(?:" . join( "|", @ia32_bad_list ) . ")" ]}}i;
108
109sub bad_ia32_instr($$) {
110    my ( $instr, $args ) = @_;
111    return ( defined( $instr ) and $instr =~ $ia32_bad_re );
112}; # sub bad_ia32_instr
113
114sub check_file($;$$) {
115
116    my ( $file, $show_instructions, $max_instructions ) = @_;
117    my @bulk;
118
119    if ( not defined( $max_instructions ) ) {
120        $max_instructions = 100;
121    }; # if
122
123    execute( [ "x86_64-k1om-linux-objdump", "-d", $file ], -stdout => \@bulk );
124
125    my $n = 0;
126    my $errors = 0;
127    my $current_func  = "";    # Name of current function.
128    my $reported_func = "";    # name of last reported function.
129    foreach my $line ( @bulk ) {
130        ++ $n;
131        if ( 0 ) {
132        } elsif ( $line =~ m{^\s*$} ) {
133            # Empty line.
134            # Ignore.
135        } elsif ( $line =~ m{^In archive (.*?):\s*$} ) {
136            # In archive libomp.a:
137        } elsif ( $line =~ m{^(?:.*?):\s*file format (.*?)\s*$} ) {
138            # libomp.so:     file format elf64-x86-64-freebsd
139            # kmp_ftn_cdecl.o:     file format elf64-x86-64
140            my $fmt = $1;
141            if ( bad_fmt( $fmt ) ) {
142                runtime_error( "Invalid file format: $fmt." );
143            }; # if
144        } elsif ( $line =~ m{^Disassembly of section (.*?):\s*$} ) {
145            # Disassembly of section .plt:
146        } elsif ( $line =~ m{^$hex+ <([^>]+)>:\s*$} ) {
147            # 0000000000017e98 <__kmp_str_format@plt-0x10>:
148            $current_func = $1;
149        } elsif ( $line =~ m{^\s*\.{3}\s*$} ) {
150        } elsif ( $line =~ m{^\s*($hex+):\s+($hex$hex(?: $hex$hex)*)\s+(?:lock\s+|rex[.a-z]*\s+)?([^ ]+)(?:\s+([^#]+?))?\s*(?:#|$)} ) {
151            #   17e98:       ff 35 fa 7d 26 00       pushq  0x267dfa(%rip)        # 27fc98 <_GLOBAL_OFFSET_TABLE>
152            my ( $addr, $dump, $instr, $args ) = ( $1, $2, $3, $4 );
153            # Check this is not a bad instruction and xmm registers are not used.
154            if ( bad_instr( $instr, $args ) ) {
155                if ( $errors == 0 ) {
156                    warning( "Invalid instructions found in `$file':" );
157                }; # if
158                if ( $current_func ne $reported_func ) {
159                    warning( "    $current_func" );
160                    $reported_func = $current_func;
161                }; # if
162                ++ $errors;
163                if ( $show_instructions ) {
164                    warning( "        $line" );
165                }; # if
166                if ( $errors >= $max_instructions ) {
167                    info( "$errors invalid instructions found; scanning stopped." );
168                    last;
169                }; # if
170            }; # if
171        } else {
172            runtime_error( "Error parsing objdump output line $n:\n>>>> $line\n" );
173        }; # if
174    }; # foreach $line
175
176    return $errors;
177
178}; # sub check_file
179
180# --------------------------------------------------------------------------------------------------
181
182# Parse command line.
183my $max_instructions;
184my $show_instructions;
185get_options(
186    "os=s"               => \$target_os,
187    "arch=s"             => \$target_arch,
188    "mic-arch=s"         => \$target_mic_arch,
189    "max-instructions=i" => \$max_instructions,
190    "show-instructions!" => \$show_instructions,
191);
192my $target_platform = $target_os . "_" . $target_arch;
193if ( "$target_os" eq "lin" and "$target_mic_arch" eq "knf" ) {
194    $mic_bad_re = qr{^(?:pause|[slm]fence|scatter|gather|cmpxchg16b|clevict[12])}i;
195} else {
196    $mic_bad_re = qr{^(?:pause|[slm]fence|scatter|gather|cmov|cmpxchg16b|clevict[12])}i;
197};
198if ( 0 ) {
199} elsif ( $target_platform eq "lin_mic" ) {
200    *bad_instr = \*bad_mic_instr;
201    *bad_fmt   = \*bad_mic_fmt;
202} elsif ( $target_platform eq "lin_32" ) {
203    *bad_instr = \*bad_ia32_instr;
204    *bad_fmt   = \*bad_ia32_fmt;
205} else {
206    runtime_error( "Only works on lin_32 and lin_mic platforms." );
207}; # if
208
209# Do the work.
210my $rc = 0;
211if ( not @ARGV ) {
212    info( "No arguments specified -- nothing to do." );
213} else {
214    foreach my $arg ( @ARGV ) {
215        my $errs = check_file( $arg, $show_instructions, $max_instructions );
216        if ( $errs > 0 ) {
217            $rc = 3;
218        }; # if
219    }; # foreach $arg
220}; # if
221
222exit( $rc );
223
224__END__
225
226=pod
227
228=head1 NAME
229
230B<check-instruction-set.pl> -- Make sure binary file does not contain undesired instructions.
231
232=head1 SYNOPSIS
233
234B<check-instructions.pl> I<option>... I<file>...
235
236=head1 OPTIONS
237
238=over
239
240=item B<--architecture=>I<arch>
241
242Specify target architecture.
243
244=item B<--max-instructions=>I<number>
245
246Stop scanning if I<number> invalid instructions found. 100 by default.
247
248=item B<--os=>I<os>
249
250Specify target OS.
251
252=item B<-->[B<no->]B<show-instructions>
253
254Show invalid instructions found in the file. Bu default, instructions are not shown.
255
256=item Standard Options
257
258=over
259
260=item B<--doc>
261
262=item B<--manual>
263
264Print full help message and exit.
265
266=item B<--help>
267
268Print short help message and exit.
269
270=item B<--usage>
271
272Print very short usage message and exit.
273
274=item B<--verbose>
275
276Do print informational messages.
277
278=item B<--version>
279
280Print program version and exit.
281
282=item B<--quiet>
283
284Work quiet, do not print informational messages.
285
286=back
287
288=back
289
290=head1 ARGUMENTS
291
292=over
293
294=item I<file>
295
296File (object file or library, either static or dynamic) to check.
297
298=back
299
300=head1 DESCRIPTION
301
302The script runs F<objdump> utility to get disassembler listing and checks the file does not contain
303unwanted instructions.
304
305Currently the script works only for:
306
307=over
308
309=item C<lin_mic>
310
311Intel(R) Many Integrated Core Architecture target OS. Undesired unstructions are: all x87 instructions and some others.
312
313=item C<lin_32>
314
315Undesired instructions are instructions not valid for Pentium 3 processor (SSE2 and newer).
316
317=back
318
319=cut
320
321