1 /* tc-i386.c -- Assemble code for the Intel 80386
2    Copyright (C) 1989-2020 Free Software Foundation, Inc.
3 
4    This file is part of GAS, the GNU Assembler.
5 
6    GAS is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GAS is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GAS; see the file COPYING.  If not, write to the Free
18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19    02110-1301, USA.  */
20 
21 /* Intel 80386 machine specific gas.
22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
23    x86_64 support by Jan Hubicka (jh@suse.cz)
24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
25    Bugs & suggestions are completely welcome.  This is free software.
26    Please help us make it better.  */
27 
28 #include "as.h"
29 #include "safe-ctype.h"
30 #include "subsegs.h"
31 #include "dwarf2dbg.h"
32 #include "dw2gencfi.h"
33 #include "elf/x86-64.h"
34 #include "opcodes/i386-init.h"
35 
36 #ifdef HAVE_LIMITS_H
37 #include <limits.h>
38 #else
39 #ifdef HAVE_SYS_PARAM_H
40 #include <sys/param.h>
41 #endif
42 #ifndef INT_MAX
43 #define INT_MAX (int) (((unsigned) (-1)) >> 1)
44 #endif
45 #endif
46 
47 #ifndef REGISTER_WARNINGS
48 #define REGISTER_WARNINGS 1
49 #endif
50 
51 #ifndef INFER_ADDR_PREFIX
52 #define INFER_ADDR_PREFIX 1
53 #endif
54 
55 #ifndef DEFAULT_ARCH
56 #define DEFAULT_ARCH "i386"
57 #endif
58 
59 #ifndef INLINE
60 #if __GNUC__ >= 2
61 #define INLINE __inline__
62 #else
63 #define INLINE
64 #endif
65 #endif
66 
67 /* Prefixes will be emitted in the order defined below.
68    WAIT_PREFIX must be the first prefix since FWAIT is really is an
69    instruction, and so must come before any prefixes.
70    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
71    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
72 #define WAIT_PREFIX	0
73 #define SEG_PREFIX	1
74 #define ADDR_PREFIX	2
75 #define DATA_PREFIX	3
76 #define REP_PREFIX	4
77 #define HLE_PREFIX	REP_PREFIX
78 #define BND_PREFIX	REP_PREFIX
79 #define LOCK_PREFIX	5
80 #define REX_PREFIX	6       /* must come last.  */
81 #define MAX_PREFIXES	7	/* max prefixes per opcode */
82 
83 /* we define the syntax here (modulo base,index,scale syntax) */
84 #define REGISTER_PREFIX '%'
85 #define IMMEDIATE_PREFIX '$'
86 #define ABSOLUTE_PREFIX '*'
87 
88 /* these are the instruction mnemonic suffixes in AT&T syntax or
89    memory operand size in Intel syntax.  */
90 #define WORD_MNEM_SUFFIX  'w'
91 #define BYTE_MNEM_SUFFIX  'b'
92 #define SHORT_MNEM_SUFFIX 's'
93 #define LONG_MNEM_SUFFIX  'l'
94 #define QWORD_MNEM_SUFFIX  'q'
95 /* Intel Syntax.  Use a non-ascii letter since since it never appears
96    in instructions.  */
97 #define LONG_DOUBLE_MNEM_SUFFIX '\1'
98 
99 #define END_OF_INSN '\0'
100 
101 /* This matches the C -> StaticRounding alias in the opcode table.  */
102 #define commutative staticrounding
103 
104 /*
105   'templates' is for grouping together 'template' structures for opcodes
106   of the same name.  This is only used for storing the insns in the grand
107   ole hash table of insns.
108   The templates themselves start at START and range up to (but not including)
109   END.
110   */
111 typedef struct
112 {
113   const insn_template *start;
114   const insn_template *end;
115 }
116 templates;
117 
118 /* 386 operand encoding bytes:  see 386 book for details of this.  */
119 typedef struct
120 {
121   unsigned int regmem;	/* codes register or memory operand */
122   unsigned int reg;	/* codes register operand (or extended opcode) */
123   unsigned int mode;	/* how to interpret regmem & reg */
124 }
125 modrm_byte;
126 
127 /* x86-64 extension prefix.  */
128 typedef int rex_byte;
129 
130 /* 386 opcode byte to code indirect addressing.  */
131 typedef struct
132 {
133   unsigned base;
134   unsigned index;
135   unsigned scale;
136 }
137 sib_byte;
138 
139 /* x86 arch names, types and features */
140 typedef struct
141 {
142   const char *name;		/* arch name */
143   unsigned int len;		/* arch string length */
144   enum processor_type type;	/* arch type */
145   i386_cpu_flags flags;		/* cpu feature flags */
146   unsigned int skip;		/* show_arch should skip this. */
147 }
148 arch_entry;
149 
150 /* Used to turn off indicated flags.  */
151 typedef struct
152 {
153   const char *name;		/* arch name */
154   unsigned int len;		/* arch string length */
155   i386_cpu_flags flags;		/* cpu feature flags */
156 }
157 noarch_entry;
158 
159 static void update_code_flag (int, int);
160 static void set_code_flag (int);
161 static void set_16bit_gcc_code_flag (int);
162 static void set_intel_syntax (int);
163 static void set_intel_mnemonic (int);
164 static void set_allow_index_reg (int);
165 static void set_check (int);
166 static void set_cpu_arch (int);
167 #ifdef TE_PE
168 static void pe_directive_secrel (int);
169 #endif
170 static void signed_cons (int);
171 static char *output_invalid (int c);
172 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
173 				    const char *);
174 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
175 				       const char *);
176 static int i386_att_operand (char *);
177 static int i386_intel_operand (char *, int);
178 static int i386_intel_simplify (expressionS *);
179 static int i386_intel_parse_name (const char *, expressionS *);
180 static const reg_entry *parse_register (char *, char **);
181 static char *parse_insn (char *, char *);
182 static char *parse_operands (char *, const char *);
183 static void swap_operands (void);
184 static void swap_2_operands (int, int);
185 static enum flag_code i386_addressing_mode (void);
186 static void optimize_imm (void);
187 static void optimize_disp (void);
188 static const insn_template *match_template (char);
189 static int check_string (void);
190 static int process_suffix (void);
191 static int check_byte_reg (void);
192 static int check_long_reg (void);
193 static int check_qword_reg (void);
194 static int check_word_reg (void);
195 static int finalize_imm (void);
196 static int process_operands (void);
197 static const seg_entry *build_modrm_byte (void);
198 static void output_insn (void);
199 static void output_imm (fragS *, offsetT);
200 static void output_disp (fragS *, offsetT);
201 #ifndef I386COFF
202 static void s_bss (int);
203 #endif
204 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
205 static void handle_large_common (int small ATTRIBUTE_UNUSED);
206 
207 /* GNU_PROPERTY_X86_ISA_1_USED.  */
208 static unsigned int x86_isa_1_used;
209 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
210 static unsigned int x86_feature_2_used;
211 /* Generate x86 used ISA and feature properties.  */
212 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
213 #endif
214 
215 static const char *default_arch = DEFAULT_ARCH;
216 
217 /* This struct describes rounding control and SAE in the instruction.  */
218 struct RC_Operation
219 {
220   enum rc_type
221     {
222       rne = 0,
223       rd,
224       ru,
225       rz,
226       saeonly
227     } type;
228   int operand;
229 };
230 
231 static struct RC_Operation rc_op;
232 
233 /* The struct describes masking, applied to OPERAND in the instruction.
234    MASK is a pointer to the corresponding mask register.  ZEROING tells
235    whether merging or zeroing mask is used.  */
236 struct Mask_Operation
237 {
238   const reg_entry *mask;
239   unsigned int zeroing;
240   /* The operand where this operation is associated.  */
241   int operand;
242 };
243 
244 static struct Mask_Operation mask_op;
245 
246 /* The struct describes broadcasting, applied to OPERAND.  FACTOR is
247    broadcast factor.  */
248 struct Broadcast_Operation
249 {
250   /* Type of broadcast: {1to2}, {1to4}, {1to8}, or {1to16}.  */
251   int type;
252 
253   /* Index of broadcasted operand.  */
254   int operand;
255 
256   /* Number of bytes to broadcast.  */
257   int bytes;
258 };
259 
260 static struct Broadcast_Operation broadcast_op;
261 
262 /* VEX prefix.  */
263 typedef struct
264 {
265   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
266   unsigned char bytes[4];
267   unsigned int length;
268   /* Destination or source register specifier.  */
269   const reg_entry *register_specifier;
270 } vex_prefix;
271 
272 /* 'md_assemble ()' gathers together information and puts it into a
273    i386_insn.  */
274 
275 union i386_op
276   {
277     expressionS *disps;
278     expressionS *imms;
279     const reg_entry *regs;
280   };
281 
282 enum i386_error
283   {
284     operand_size_mismatch,
285     operand_type_mismatch,
286     register_type_mismatch,
287     number_of_operands_mismatch,
288     invalid_instruction_suffix,
289     bad_imm4,
290     unsupported_with_intel_mnemonic,
291     unsupported_syntax,
292     unsupported,
293     invalid_vsib_address,
294     invalid_vector_register_set,
295     unsupported_vector_index_register,
296     unsupported_broadcast,
297     broadcast_needed,
298     unsupported_masking,
299     mask_not_on_destination,
300     no_default_mask,
301     unsupported_rc_sae,
302     rc_sae_operand_not_last_imm,
303     invalid_register_operand,
304   };
305 
306 struct _i386_insn
307   {
308     /* TM holds the template for the insn were currently assembling.  */
309     insn_template tm;
310 
311     /* SUFFIX holds the instruction size suffix for byte, word, dword
312        or qword, if given.  */
313     char suffix;
314 
315     /* OPERANDS gives the number of given operands.  */
316     unsigned int operands;
317 
318     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
319        of given register, displacement, memory operands and immediate
320        operands.  */
321     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
322 
323     /* TYPES [i] is the type (see above #defines) which tells us how to
324        use OP[i] for the corresponding operand.  */
325     i386_operand_type types[MAX_OPERANDS];
326 
327     /* Displacement expression, immediate expression, or register for each
328        operand.  */
329     union i386_op op[MAX_OPERANDS];
330 
331     /* Flags for operands.  */
332     unsigned int flags[MAX_OPERANDS];
333 #define Operand_PCrel 1
334 #define Operand_Mem   2
335 
336     /* Relocation type for operand */
337     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
338 
339     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
340        the base index byte below.  */
341     const reg_entry *base_reg;
342     const reg_entry *index_reg;
343     unsigned int log2_scale_factor;
344 
345     /* SEG gives the seg_entries of this insn.  They are zero unless
346        explicit segment overrides are given.  */
347     const seg_entry *seg[2];
348 
349     /* Copied first memory operand string, for re-checking.  */
350     char *memop1_string;
351 
352     /* PREFIX holds all the given prefix opcodes (usually null).
353        PREFIXES is the number of prefix opcodes.  */
354     unsigned int prefixes;
355     unsigned char prefix[MAX_PREFIXES];
356 
357     /* The operand to a branch insn indicates an absolute branch.  */
358     bfd_boolean jumpabsolute;
359 
360     /* Has MMX register operands.  */
361     bfd_boolean has_regmmx;
362 
363     /* Has XMM register operands.  */
364     bfd_boolean has_regxmm;
365 
366     /* Has YMM register operands.  */
367     bfd_boolean has_regymm;
368 
369     /* Has ZMM register operands.  */
370     bfd_boolean has_regzmm;
371 
372     /* Has GOTPC or TLS relocation.  */
373     bfd_boolean has_gotpc_tls_reloc;
374 
375     /* RM and SIB are the modrm byte and the sib byte where the
376        addressing modes of this insn are encoded.  */
377     modrm_byte rm;
378     rex_byte rex;
379     rex_byte vrex;
380     sib_byte sib;
381     vex_prefix vex;
382 
383     /* Masking attributes.  */
384     struct Mask_Operation *mask;
385 
386     /* Rounding control and SAE attributes.  */
387     struct RC_Operation *rounding;
388 
389     /* Broadcasting attributes.  */
390     struct Broadcast_Operation *broadcast;
391 
392     /* Compressed disp8*N attribute.  */
393     unsigned int memshift;
394 
395     /* Prefer load or store in encoding.  */
396     enum
397       {
398 	dir_encoding_default = 0,
399 	dir_encoding_load,
400 	dir_encoding_store,
401 	dir_encoding_swap
402       } dir_encoding;
403 
404     /* Prefer 8bit or 32bit displacement in encoding.  */
405     enum
406       {
407 	disp_encoding_default = 0,
408 	disp_encoding_8bit,
409 	disp_encoding_32bit
410       } disp_encoding;
411 
412     /* Prefer the REX byte in encoding.  */
413     bfd_boolean rex_encoding;
414 
415     /* Disable instruction size optimization.  */
416     bfd_boolean no_optimize;
417 
418     /* How to encode vector instructions.  */
419     enum
420       {
421 	vex_encoding_default = 0,
422 	vex_encoding_vex,
423 	vex_encoding_vex3,
424 	vex_encoding_evex
425       } vec_encoding;
426 
427     /* REP prefix.  */
428     const char *rep_prefix;
429 
430     /* HLE prefix.  */
431     const char *hle_prefix;
432 
433     /* Have BND prefix.  */
434     const char *bnd_prefix;
435 
436     /* Have NOTRACK prefix.  */
437     const char *notrack_prefix;
438 
439     /* Error message.  */
440     enum i386_error error;
441   };
442 
443 typedef struct _i386_insn i386_insn;
444 
445 /* Link RC type with corresponding string, that'll be looked for in
446    asm.  */
447 struct RC_name
448 {
449   enum rc_type type;
450   const char *name;
451   unsigned int len;
452 };
453 
454 static const struct RC_name RC_NamesTable[] =
455 {
456   {  rne, STRING_COMMA_LEN ("rn-sae") },
457   {  rd,  STRING_COMMA_LEN ("rd-sae") },
458   {  ru,  STRING_COMMA_LEN ("ru-sae") },
459   {  rz,  STRING_COMMA_LEN ("rz-sae") },
460   {  saeonly,  STRING_COMMA_LEN ("sae") },
461 };
462 
463 /* List of chars besides those in app.c:symbol_chars that can start an
464    operand.  Used to prevent the scrubber eating vital white-space.  */
465 const char extra_symbol_chars[] = "*%-([{}"
466 #ifdef LEX_AT
467 	"@"
468 #endif
469 #ifdef LEX_QM
470 	"?"
471 #endif
472 	;
473 
474 #if (defined (TE_I386AIX)				\
475      || ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))	\
476 	 && !defined (TE_GNU)				\
477 	 && !defined (TE_LINUX)				\
478 	 && !defined (TE_NACL)				\
479 	 && !defined (TE_FreeBSD)			\
480 	 && !defined (TE_DragonFly)			\
481 	 && !defined (TE_NetBSD)))
482 /* This array holds the chars that always start a comment.  If the
483    pre-processor is disabled, these aren't very useful.  The option
484    --divide will remove '/' from this list.  */
485 const char *i386_comment_chars = "#/";
486 #define SVR4_COMMENT_CHARS 1
487 #define PREFIX_SEPARATOR '\\'
488 
489 #else
490 const char *i386_comment_chars = "#";
491 #define PREFIX_SEPARATOR '/'
492 #endif
493 
494 /* This array holds the chars that only start a comment at the beginning of
495    a line.  If the line seems to have the form '# 123 filename'
496    .line and .file directives will appear in the pre-processed output.
497    Note that input_file.c hand checks for '#' at the beginning of the
498    first line of the input file.  This is because the compiler outputs
499    #NO_APP at the beginning of its output.
500    Also note that comments started like this one will always work if
501    '/' isn't otherwise defined.  */
502 const char line_comment_chars[] = "#/";
503 
504 const char line_separator_chars[] = ";";
505 
506 /* Chars that can be used to separate mant from exp in floating point
507    nums.  */
508 const char EXP_CHARS[] = "eE";
509 
510 /* Chars that mean this number is a floating point constant
511    As in 0f12.456
512    or    0d1.2345e12.  */
513 const char FLT_CHARS[] = "fFdDxX";
514 
515 /* Tables for lexical analysis.  */
516 static char mnemonic_chars[256];
517 static char register_chars[256];
518 static char operand_chars[256];
519 static char identifier_chars[256];
520 static char digit_chars[256];
521 
522 /* Lexical macros.  */
523 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
524 #define is_operand_char(x) (operand_chars[(unsigned char) x])
525 #define is_register_char(x) (register_chars[(unsigned char) x])
526 #define is_space_char(x) ((x) == ' ')
527 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
528 #define is_digit_char(x) (digit_chars[(unsigned char) x])
529 
530 /* All non-digit non-letter characters that may occur in an operand.  */
531 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
532 
533 /* md_assemble() always leaves the strings it's passed unaltered.  To
534    effect this we maintain a stack of saved characters that we've smashed
535    with '\0's (indicating end of strings for various sub-fields of the
536    assembler instruction).  */
537 static char save_stack[32];
538 static char *save_stack_p;
539 #define END_STRING_AND_SAVE(s) \
540 	do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
541 #define RESTORE_END_STRING(s) \
542 	do { *(s) = *--save_stack_p; } while (0)
543 
544 /* The instruction we're assembling.  */
545 static i386_insn i;
546 
547 /* Possible templates for current insn.  */
548 static const templates *current_templates;
549 
550 /* Per instruction expressionS buffers: max displacements & immediates.  */
551 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
552 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
553 
554 /* Current operand we are working on.  */
555 static int this_operand = -1;
556 
557 /* We support four different modes.  FLAG_CODE variable is used to distinguish
558    these.  */
559 
560 enum flag_code {
561 	CODE_32BIT,
562 	CODE_16BIT,
563 	CODE_64BIT };
564 
565 static enum flag_code flag_code;
566 static unsigned int object_64bit;
567 static unsigned int disallow_64bit_reloc;
568 static int use_rela_relocations = 0;
569 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
570 static const char *tls_get_addr;
571 
572 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
573      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
574      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
575 
576 /* The ELF ABI to use.  */
577 enum x86_elf_abi
578 {
579   I386_ABI,
580   X86_64_ABI,
581   X86_64_X32_ABI
582 };
583 
584 static enum x86_elf_abi x86_elf_abi = I386_ABI;
585 #endif
586 
587 #if defined (TE_PE) || defined (TE_PEP)
588 /* Use big object file format.  */
589 static int use_big_obj = 0;
590 #endif
591 
592 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
593 /* 1 if generating code for a shared library.  */
594 static int shared = 0;
595 #endif
596 
597 /* 1 for intel syntax,
598    0 if att syntax.  */
599 static int intel_syntax = 0;
600 
601 /* 1 for Intel64 ISA,
602    0 if AMD64 ISA.  */
603 static int intel64;
604 
605 /* 1 for intel mnemonic,
606    0 if att mnemonic.  */
607 static int intel_mnemonic = !SYSV386_COMPAT;
608 
609 /* 1 if pseudo registers are permitted.  */
610 static int allow_pseudo_reg = 0;
611 
612 /* 1 if register prefix % not required.  */
613 static int allow_naked_reg = 0;
614 
615 /* 1 if the assembler should add BND prefix for all control-transferring
616    instructions supporting it, even if this prefix wasn't specified
617    explicitly.  */
618 static int add_bnd_prefix = 0;
619 
620 /* 1 if pseudo index register, eiz/riz, is allowed .  */
621 static int allow_index_reg = 0;
622 
623 /* 1 if the assembler should ignore LOCK prefix, even if it was
624    specified explicitly.  */
625 static int omit_lock_prefix = 0;
626 
627 /* 1 if the assembler should encode lfence, mfence, and sfence as
628    "lock addl $0, (%{re}sp)".  */
629 static int avoid_fence = 0;
630 
631 /* Type of the previous instruction.  */
632 static struct
633   {
634     segT seg;
635     const char *file;
636     const char *name;
637     unsigned int line;
638     enum last_insn_kind
639       {
640 	last_insn_other = 0,
641 	last_insn_directive,
642 	last_insn_prefix
643       } kind;
644   } last_insn;
645 
646 /* 1 if the assembler should generate relax relocations.  */
647 
648 static int generate_relax_relocations
649   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
650 
651 static enum check_kind
652   {
653     check_none = 0,
654     check_warning,
655     check_error
656   }
657 sse_check, operand_check = check_warning;
658 
659 /* Non-zero if branches should be aligned within power of 2 boundary.  */
660 static int align_branch_power = 0;
661 
662 /* Types of branches to align.  */
663 enum align_branch_kind
664   {
665     align_branch_none = 0,
666     align_branch_jcc = 1,
667     align_branch_fused = 2,
668     align_branch_jmp = 3,
669     align_branch_call = 4,
670     align_branch_indirect = 5,
671     align_branch_ret = 6
672   };
673 
674 /* Type bits of branches to align.  */
675 enum align_branch_bit
676   {
677     align_branch_jcc_bit = 1 << align_branch_jcc,
678     align_branch_fused_bit = 1 << align_branch_fused,
679     align_branch_jmp_bit = 1 << align_branch_jmp,
680     align_branch_call_bit = 1 << align_branch_call,
681     align_branch_indirect_bit = 1 << align_branch_indirect,
682     align_branch_ret_bit = 1 << align_branch_ret
683   };
684 
685 static unsigned int align_branch = (align_branch_jcc_bit
686 				    | align_branch_fused_bit
687 				    | align_branch_jmp_bit);
688 
689 /* The maximum padding size for fused jcc.  CMP like instruction can
690    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
691    prefixes.   */
692 #define MAX_FUSED_JCC_PADDING_SIZE 20
693 
694 /* The maximum number of prefixes added for an instruction.  */
695 static unsigned int align_branch_prefix_size = 5;
696 
697 /* Optimization:
698    1. Clear the REX_W bit with register operand if possible.
699    2. Above plus use 128bit vector instruction to clear the full vector
700       register.
701  */
702 static int optimize = 0;
703 
704 /* Optimization:
705    1. Clear the REX_W bit with register operand if possible.
706    2. Above plus use 128bit vector instruction to clear the full vector
707       register.
708    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
709       "testb $imm7,%r8".
710  */
711 static int optimize_for_space = 0;
712 
713 /* Register prefix used for error message.  */
714 static const char *register_prefix = "%";
715 
716 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
717    leave, push, and pop instructions so that gcc has the same stack
718    frame as in 32 bit mode.  */
719 static char stackop_size = '\0';
720 
721 /* Non-zero to optimize code alignment.  */
722 int optimize_align_code = 1;
723 
724 /* Non-zero to quieten some warnings.  */
725 static int quiet_warnings = 0;
726 
727 /* CPU name.  */
728 static const char *cpu_arch_name = NULL;
729 static char *cpu_sub_arch_name = NULL;
730 
731 /* CPU feature flags.  */
732 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
733 
734 /* If we have selected a cpu we are generating instructions for.  */
735 static int cpu_arch_tune_set = 0;
736 
737 /* Cpu we are generating instructions for.  */
738 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
739 
740 /* CPU feature flags of cpu we are generating instructions for.  */
741 static i386_cpu_flags cpu_arch_tune_flags;
742 
743 /* CPU instruction set architecture used.  */
744 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
745 
746 /* CPU feature flags of instruction set architecture used.  */
747 i386_cpu_flags cpu_arch_isa_flags;
748 
749 /* If set, conditional jumps are not automatically promoted to handle
750    larger than a byte offset.  */
751 static unsigned int no_cond_jump_promotion = 0;
752 
753 /* Encode SSE instructions with VEX prefix.  */
754 static unsigned int sse2avx;
755 
756 /* Encode scalar AVX instructions with specific vector length.  */
757 static enum
758   {
759     vex128 = 0,
760     vex256
761   } avxscalar;
762 
763 /* Encode VEX WIG instructions with specific vex.w.  */
764 static enum
765   {
766     vexw0 = 0,
767     vexw1
768   } vexwig;
769 
770 /* Encode scalar EVEX LIG instructions with specific vector length.  */
771 static enum
772   {
773     evexl128 = 0,
774     evexl256,
775     evexl512
776   } evexlig;
777 
778 /* Encode EVEX WIG instructions with specific evex.w.  */
779 static enum
780   {
781     evexw0 = 0,
782     evexw1
783   } evexwig;
784 
785 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
786 static enum rc_type evexrcig = rne;
787 
788 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
789 static symbolS *GOT_symbol;
790 
791 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
792 unsigned int x86_dwarf2_return_column;
793 
794 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
795 int x86_cie_data_alignment;
796 
797 /* Interface to relax_segment.
798    There are 3 major relax states for 386 jump insns because the
799    different types of jumps add different sizes to frags when we're
800    figuring out what sort of jump to choose to reach a given label.
801 
802    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
803    branches which are handled by md_estimate_size_before_relax() and
804    i386_generic_table_relax_frag().  */
805 
806 /* Types.  */
807 #define UNCOND_JUMP 0
808 #define COND_JUMP 1
809 #define COND_JUMP86 2
810 #define BRANCH_PADDING 3
811 #define BRANCH_PREFIX 4
812 #define FUSED_JCC_PADDING 5
813 
814 /* Sizes.  */
815 #define CODE16	1
816 #define SMALL	0
817 #define SMALL16 (SMALL | CODE16)
818 #define BIG	2
819 #define BIG16	(BIG | CODE16)
820 
821 #ifndef INLINE
822 #ifdef __GNUC__
823 #define INLINE __inline__
824 #else
825 #define INLINE
826 #endif
827 #endif
828 
829 #define ENCODE_RELAX_STATE(type, size) \
830   ((relax_substateT) (((type) << 2) | (size)))
831 #define TYPE_FROM_RELAX_STATE(s) \
832   ((s) >> 2)
833 #define DISP_SIZE_FROM_RELAX_STATE(s) \
834     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
835 
836 /* This table is used by relax_frag to promote short jumps to long
837    ones where necessary.  SMALL (short) jumps may be promoted to BIG
838    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
839    don't allow a short jump in a 32 bit code segment to be promoted to
840    a 16 bit offset jump because it's slower (requires data size
841    prefix), and doesn't work, unless the destination is in the bottom
842    64k of the code segment (The top 16 bits of eip are zeroed).  */
843 
844 const relax_typeS md_relax_table[] =
845 {
846   /* The fields are:
847      1) most positive reach of this state,
848      2) most negative reach of this state,
849      3) how many bytes this mode will have in the variable part of the frag
850      4) which index into the table to try if we can't fit into this one.  */
851 
852   /* UNCOND_JUMP states.  */
853   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
854   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
855   /* dword jmp adds 4 bytes to frag:
856      0 extra opcode bytes, 4 displacement bytes.  */
857   {0, 0, 4, 0},
858   /* word jmp adds 2 byte2 to frag:
859      0 extra opcode bytes, 2 displacement bytes.  */
860   {0, 0, 2, 0},
861 
862   /* COND_JUMP states.  */
863   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
864   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
865   /* dword conditionals adds 5 bytes to frag:
866      1 extra opcode byte, 4 displacement bytes.  */
867   {0, 0, 5, 0},
868   /* word conditionals add 3 bytes to frag:
869      1 extra opcode byte, 2 displacement bytes.  */
870   {0, 0, 3, 0},
871 
872   /* COND_JUMP86 states.  */
873   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
874   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
875   /* dword conditionals adds 5 bytes to frag:
876      1 extra opcode byte, 4 displacement bytes.  */
877   {0, 0, 5, 0},
878   /* word conditionals add 4 bytes to frag:
879      1 displacement byte and a 3 byte long branch insn.  */
880   {0, 0, 4, 0}
881 };
882 
883 static const arch_entry cpu_arch[] =
884 {
885   /* Do not replace the first two entries - i386_target_format()
886      relies on them being there in this order.  */
887   { STRING_COMMA_LEN ("generic32"), PROCESSOR_GENERIC32,
888     CPU_GENERIC32_FLAGS, 0 },
889   { STRING_COMMA_LEN ("generic64"), PROCESSOR_GENERIC64,
890     CPU_GENERIC64_FLAGS, 0 },
891   { STRING_COMMA_LEN ("i8086"), PROCESSOR_UNKNOWN,
892     CPU_NONE_FLAGS, 0 },
893   { STRING_COMMA_LEN ("i186"), PROCESSOR_UNKNOWN,
894     CPU_I186_FLAGS, 0 },
895   { STRING_COMMA_LEN ("i286"), PROCESSOR_UNKNOWN,
896     CPU_I286_FLAGS, 0 },
897   { STRING_COMMA_LEN ("i386"), PROCESSOR_I386,
898     CPU_I386_FLAGS, 0 },
899   { STRING_COMMA_LEN ("i486"), PROCESSOR_I486,
900     CPU_I486_FLAGS, 0 },
901   { STRING_COMMA_LEN ("i586"), PROCESSOR_PENTIUM,
902     CPU_I586_FLAGS, 0 },
903   { STRING_COMMA_LEN ("i686"), PROCESSOR_PENTIUMPRO,
904     CPU_I686_FLAGS, 0 },
905   { STRING_COMMA_LEN ("pentium"), PROCESSOR_PENTIUM,
906     CPU_I586_FLAGS, 0 },
907   { STRING_COMMA_LEN ("pentiumpro"), PROCESSOR_PENTIUMPRO,
908     CPU_PENTIUMPRO_FLAGS, 0 },
909   { STRING_COMMA_LEN ("pentiumii"), PROCESSOR_PENTIUMPRO,
910     CPU_P2_FLAGS, 0 },
911   { STRING_COMMA_LEN ("pentiumiii"),PROCESSOR_PENTIUMPRO,
912     CPU_P3_FLAGS, 0 },
913   { STRING_COMMA_LEN ("pentium4"), PROCESSOR_PENTIUM4,
914     CPU_P4_FLAGS, 0 },
915   { STRING_COMMA_LEN ("prescott"), PROCESSOR_NOCONA,
916     CPU_CORE_FLAGS, 0 },
917   { STRING_COMMA_LEN ("nocona"), PROCESSOR_NOCONA,
918     CPU_NOCONA_FLAGS, 0 },
919   { STRING_COMMA_LEN ("yonah"), PROCESSOR_CORE,
920     CPU_CORE_FLAGS, 1 },
921   { STRING_COMMA_LEN ("core"), PROCESSOR_CORE,
922     CPU_CORE_FLAGS, 0 },
923   { STRING_COMMA_LEN ("merom"), PROCESSOR_CORE2,
924     CPU_CORE2_FLAGS, 1 },
925   { STRING_COMMA_LEN ("core2"), PROCESSOR_CORE2,
926     CPU_CORE2_FLAGS, 0 },
927   { STRING_COMMA_LEN ("corei7"), PROCESSOR_COREI7,
928     CPU_COREI7_FLAGS, 0 },
929   { STRING_COMMA_LEN ("l1om"), PROCESSOR_L1OM,
930     CPU_L1OM_FLAGS, 0 },
931   { STRING_COMMA_LEN ("k1om"), PROCESSOR_K1OM,
932     CPU_K1OM_FLAGS, 0 },
933   { STRING_COMMA_LEN ("iamcu"), PROCESSOR_IAMCU,
934     CPU_IAMCU_FLAGS, 0 },
935   { STRING_COMMA_LEN ("k6"), PROCESSOR_K6,
936     CPU_K6_FLAGS, 0 },
937   { STRING_COMMA_LEN ("k6_2"), PROCESSOR_K6,
938     CPU_K6_2_FLAGS, 0 },
939   { STRING_COMMA_LEN ("athlon"), PROCESSOR_ATHLON,
940     CPU_ATHLON_FLAGS, 0 },
941   { STRING_COMMA_LEN ("sledgehammer"), PROCESSOR_K8,
942     CPU_K8_FLAGS, 1 },
943   { STRING_COMMA_LEN ("opteron"), PROCESSOR_K8,
944     CPU_K8_FLAGS, 0 },
945   { STRING_COMMA_LEN ("k8"), PROCESSOR_K8,
946     CPU_K8_FLAGS, 0 },
947   { STRING_COMMA_LEN ("amdfam10"), PROCESSOR_AMDFAM10,
948     CPU_AMDFAM10_FLAGS, 0 },
949   { STRING_COMMA_LEN ("bdver1"), PROCESSOR_BD,
950     CPU_BDVER1_FLAGS, 0 },
951   { STRING_COMMA_LEN ("bdver2"), PROCESSOR_BD,
952     CPU_BDVER2_FLAGS, 0 },
953   { STRING_COMMA_LEN ("bdver3"), PROCESSOR_BD,
954     CPU_BDVER3_FLAGS, 0 },
955   { STRING_COMMA_LEN ("bdver4"), PROCESSOR_BD,
956     CPU_BDVER4_FLAGS, 0 },
957   { STRING_COMMA_LEN ("znver1"), PROCESSOR_ZNVER,
958     CPU_ZNVER1_FLAGS, 0 },
959   { STRING_COMMA_LEN ("znver2"), PROCESSOR_ZNVER,
960     CPU_ZNVER2_FLAGS, 0 },
961   { STRING_COMMA_LEN ("btver1"), PROCESSOR_BT,
962     CPU_BTVER1_FLAGS, 0 },
963   { STRING_COMMA_LEN ("btver2"), PROCESSOR_BT,
964     CPU_BTVER2_FLAGS, 0 },
965   { STRING_COMMA_LEN (".8087"), PROCESSOR_UNKNOWN,
966     CPU_8087_FLAGS, 0 },
967   { STRING_COMMA_LEN (".287"), PROCESSOR_UNKNOWN,
968     CPU_287_FLAGS, 0 },
969   { STRING_COMMA_LEN (".387"), PROCESSOR_UNKNOWN,
970     CPU_387_FLAGS, 0 },
971   { STRING_COMMA_LEN (".687"), PROCESSOR_UNKNOWN,
972     CPU_687_FLAGS, 0 },
973   { STRING_COMMA_LEN (".cmov"), PROCESSOR_UNKNOWN,
974     CPU_CMOV_FLAGS, 0 },
975   { STRING_COMMA_LEN (".fxsr"), PROCESSOR_UNKNOWN,
976     CPU_FXSR_FLAGS, 0 },
977   { STRING_COMMA_LEN (".mmx"), PROCESSOR_UNKNOWN,
978     CPU_MMX_FLAGS, 0 },
979   { STRING_COMMA_LEN (".sse"), PROCESSOR_UNKNOWN,
980     CPU_SSE_FLAGS, 0 },
981   { STRING_COMMA_LEN (".sse2"), PROCESSOR_UNKNOWN,
982     CPU_SSE2_FLAGS, 0 },
983   { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN,
984     CPU_SSE3_FLAGS, 0 },
985   { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN,
986     CPU_SSSE3_FLAGS, 0 },
987   { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN,
988     CPU_SSE4_1_FLAGS, 0 },
989   { STRING_COMMA_LEN (".sse4.2"), PROCESSOR_UNKNOWN,
990     CPU_SSE4_2_FLAGS, 0 },
991   { STRING_COMMA_LEN (".sse4"), PROCESSOR_UNKNOWN,
992     CPU_SSE4_2_FLAGS, 0 },
993   { STRING_COMMA_LEN (".avx"), PROCESSOR_UNKNOWN,
994     CPU_AVX_FLAGS, 0 },
995   { STRING_COMMA_LEN (".avx2"), PROCESSOR_UNKNOWN,
996     CPU_AVX2_FLAGS, 0 },
997   { STRING_COMMA_LEN (".avx512f"), PROCESSOR_UNKNOWN,
998     CPU_AVX512F_FLAGS, 0 },
999   { STRING_COMMA_LEN (".avx512cd"), PROCESSOR_UNKNOWN,
1000     CPU_AVX512CD_FLAGS, 0 },
1001   { STRING_COMMA_LEN (".avx512er"), PROCESSOR_UNKNOWN,
1002     CPU_AVX512ER_FLAGS, 0 },
1003   { STRING_COMMA_LEN (".avx512pf"), PROCESSOR_UNKNOWN,
1004     CPU_AVX512PF_FLAGS, 0 },
1005   { STRING_COMMA_LEN (".avx512dq"), PROCESSOR_UNKNOWN,
1006     CPU_AVX512DQ_FLAGS, 0 },
1007   { STRING_COMMA_LEN (".avx512bw"), PROCESSOR_UNKNOWN,
1008     CPU_AVX512BW_FLAGS, 0 },
1009   { STRING_COMMA_LEN (".avx512vl"), PROCESSOR_UNKNOWN,
1010     CPU_AVX512VL_FLAGS, 0 },
1011   { STRING_COMMA_LEN (".vmx"), PROCESSOR_UNKNOWN,
1012     CPU_VMX_FLAGS, 0 },
1013   { STRING_COMMA_LEN (".vmfunc"), PROCESSOR_UNKNOWN,
1014     CPU_VMFUNC_FLAGS, 0 },
1015   { STRING_COMMA_LEN (".smx"), PROCESSOR_UNKNOWN,
1016     CPU_SMX_FLAGS, 0 },
1017   { STRING_COMMA_LEN (".xsave"), PROCESSOR_UNKNOWN,
1018     CPU_XSAVE_FLAGS, 0 },
1019   { STRING_COMMA_LEN (".xsaveopt"), PROCESSOR_UNKNOWN,
1020     CPU_XSAVEOPT_FLAGS, 0 },
1021   { STRING_COMMA_LEN (".xsavec"), PROCESSOR_UNKNOWN,
1022     CPU_XSAVEC_FLAGS, 0 },
1023   { STRING_COMMA_LEN (".xsaves"), PROCESSOR_UNKNOWN,
1024     CPU_XSAVES_FLAGS, 0 },
1025   { STRING_COMMA_LEN (".aes"), PROCESSOR_UNKNOWN,
1026     CPU_AES_FLAGS, 0 },
1027   { STRING_COMMA_LEN (".pclmul"), PROCESSOR_UNKNOWN,
1028     CPU_PCLMUL_FLAGS, 0 },
1029   { STRING_COMMA_LEN (".clmul"), PROCESSOR_UNKNOWN,
1030     CPU_PCLMUL_FLAGS, 1 },
1031   { STRING_COMMA_LEN (".fsgsbase"), PROCESSOR_UNKNOWN,
1032     CPU_FSGSBASE_FLAGS, 0 },
1033   { STRING_COMMA_LEN (".rdrnd"), PROCESSOR_UNKNOWN,
1034     CPU_RDRND_FLAGS, 0 },
1035   { STRING_COMMA_LEN (".f16c"), PROCESSOR_UNKNOWN,
1036     CPU_F16C_FLAGS, 0 },
1037   { STRING_COMMA_LEN (".bmi2"), PROCESSOR_UNKNOWN,
1038     CPU_BMI2_FLAGS, 0 },
1039   { STRING_COMMA_LEN (".fma"), PROCESSOR_UNKNOWN,
1040     CPU_FMA_FLAGS, 0 },
1041   { STRING_COMMA_LEN (".fma4"), PROCESSOR_UNKNOWN,
1042     CPU_FMA4_FLAGS, 0 },
1043   { STRING_COMMA_LEN (".xop"), PROCESSOR_UNKNOWN,
1044     CPU_XOP_FLAGS, 0 },
1045   { STRING_COMMA_LEN (".lwp"), PROCESSOR_UNKNOWN,
1046     CPU_LWP_FLAGS, 0 },
1047   { STRING_COMMA_LEN (".movbe"), PROCESSOR_UNKNOWN,
1048     CPU_MOVBE_FLAGS, 0 },
1049   { STRING_COMMA_LEN (".cx16"), PROCESSOR_UNKNOWN,
1050     CPU_CX16_FLAGS, 0 },
1051   { STRING_COMMA_LEN (".ept"), PROCESSOR_UNKNOWN,
1052     CPU_EPT_FLAGS, 0 },
1053   { STRING_COMMA_LEN (".lzcnt"), PROCESSOR_UNKNOWN,
1054     CPU_LZCNT_FLAGS, 0 },
1055   { STRING_COMMA_LEN (".hle"), PROCESSOR_UNKNOWN,
1056     CPU_HLE_FLAGS, 0 },
1057   { STRING_COMMA_LEN (".rtm"), PROCESSOR_UNKNOWN,
1058     CPU_RTM_FLAGS, 0 },
1059   { STRING_COMMA_LEN (".invpcid"), PROCESSOR_UNKNOWN,
1060     CPU_INVPCID_FLAGS, 0 },
1061   { STRING_COMMA_LEN (".clflush"), PROCESSOR_UNKNOWN,
1062     CPU_CLFLUSH_FLAGS, 0 },
1063   { STRING_COMMA_LEN (".nop"), PROCESSOR_UNKNOWN,
1064     CPU_NOP_FLAGS, 0 },
1065   { STRING_COMMA_LEN (".syscall"), PROCESSOR_UNKNOWN,
1066     CPU_SYSCALL_FLAGS, 0 },
1067   { STRING_COMMA_LEN (".rdtscp"), PROCESSOR_UNKNOWN,
1068     CPU_RDTSCP_FLAGS, 0 },
1069   { STRING_COMMA_LEN (".3dnow"), PROCESSOR_UNKNOWN,
1070     CPU_3DNOW_FLAGS, 0 },
1071   { STRING_COMMA_LEN (".3dnowa"), PROCESSOR_UNKNOWN,
1072     CPU_3DNOWA_FLAGS, 0 },
1073   { STRING_COMMA_LEN (".padlock"), PROCESSOR_UNKNOWN,
1074     CPU_PADLOCK_FLAGS, 0 },
1075   { STRING_COMMA_LEN (".pacifica"), PROCESSOR_UNKNOWN,
1076     CPU_SVME_FLAGS, 1 },
1077   { STRING_COMMA_LEN (".svme"), PROCESSOR_UNKNOWN,
1078     CPU_SVME_FLAGS, 0 },
1079   { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN,
1080     CPU_SSE4A_FLAGS, 0 },
1081   { STRING_COMMA_LEN (".abm"), PROCESSOR_UNKNOWN,
1082     CPU_ABM_FLAGS, 0 },
1083   { STRING_COMMA_LEN (".bmi"), PROCESSOR_UNKNOWN,
1084     CPU_BMI_FLAGS, 0 },
1085   { STRING_COMMA_LEN (".tbm"), PROCESSOR_UNKNOWN,
1086     CPU_TBM_FLAGS, 0 },
1087   { STRING_COMMA_LEN (".adx"), PROCESSOR_UNKNOWN,
1088     CPU_ADX_FLAGS, 0 },
1089   { STRING_COMMA_LEN (".rdseed"), PROCESSOR_UNKNOWN,
1090     CPU_RDSEED_FLAGS, 0 },
1091   { STRING_COMMA_LEN (".prfchw"), PROCESSOR_UNKNOWN,
1092     CPU_PRFCHW_FLAGS, 0 },
1093   { STRING_COMMA_LEN (".smap"), PROCESSOR_UNKNOWN,
1094     CPU_SMAP_FLAGS, 0 },
1095   { STRING_COMMA_LEN (".mpx"), PROCESSOR_UNKNOWN,
1096     CPU_MPX_FLAGS, 0 },
1097   { STRING_COMMA_LEN (".sha"), PROCESSOR_UNKNOWN,
1098     CPU_SHA_FLAGS, 0 },
1099   { STRING_COMMA_LEN (".clflushopt"), PROCESSOR_UNKNOWN,
1100     CPU_CLFLUSHOPT_FLAGS, 0 },
1101   { STRING_COMMA_LEN (".prefetchwt1"), PROCESSOR_UNKNOWN,
1102     CPU_PREFETCHWT1_FLAGS, 0 },
1103   { STRING_COMMA_LEN (".se1"), PROCESSOR_UNKNOWN,
1104     CPU_SE1_FLAGS, 0 },
1105   { STRING_COMMA_LEN (".clwb"), PROCESSOR_UNKNOWN,
1106     CPU_CLWB_FLAGS, 0 },
1107   { STRING_COMMA_LEN (".avx512ifma"), PROCESSOR_UNKNOWN,
1108     CPU_AVX512IFMA_FLAGS, 0 },
1109   { STRING_COMMA_LEN (".avx512vbmi"), PROCESSOR_UNKNOWN,
1110     CPU_AVX512VBMI_FLAGS, 0 },
1111   { STRING_COMMA_LEN (".avx512_4fmaps"), PROCESSOR_UNKNOWN,
1112     CPU_AVX512_4FMAPS_FLAGS, 0 },
1113   { STRING_COMMA_LEN (".avx512_4vnniw"), PROCESSOR_UNKNOWN,
1114     CPU_AVX512_4VNNIW_FLAGS, 0 },
1115   { STRING_COMMA_LEN (".avx512_vpopcntdq"), PROCESSOR_UNKNOWN,
1116     CPU_AVX512_VPOPCNTDQ_FLAGS, 0 },
1117   { STRING_COMMA_LEN (".avx512_vbmi2"), PROCESSOR_UNKNOWN,
1118     CPU_AVX512_VBMI2_FLAGS, 0 },
1119   { STRING_COMMA_LEN (".avx512_vnni"), PROCESSOR_UNKNOWN,
1120     CPU_AVX512_VNNI_FLAGS, 0 },
1121   { STRING_COMMA_LEN (".avx512_bitalg"), PROCESSOR_UNKNOWN,
1122     CPU_AVX512_BITALG_FLAGS, 0 },
1123   { STRING_COMMA_LEN (".clzero"), PROCESSOR_UNKNOWN,
1124     CPU_CLZERO_FLAGS, 0 },
1125   { STRING_COMMA_LEN (".mwaitx"), PROCESSOR_UNKNOWN,
1126     CPU_MWAITX_FLAGS, 0 },
1127   { STRING_COMMA_LEN (".ospke"), PROCESSOR_UNKNOWN,
1128     CPU_OSPKE_FLAGS, 0 },
1129   { STRING_COMMA_LEN (".rdpid"), PROCESSOR_UNKNOWN,
1130     CPU_RDPID_FLAGS, 0 },
1131   { STRING_COMMA_LEN (".ptwrite"), PROCESSOR_UNKNOWN,
1132     CPU_PTWRITE_FLAGS, 0 },
1133   { STRING_COMMA_LEN (".ibt"), PROCESSOR_UNKNOWN,
1134     CPU_IBT_FLAGS, 0 },
1135   { STRING_COMMA_LEN (".shstk"), PROCESSOR_UNKNOWN,
1136     CPU_SHSTK_FLAGS, 0 },
1137   { STRING_COMMA_LEN (".gfni"), PROCESSOR_UNKNOWN,
1138     CPU_GFNI_FLAGS, 0 },
1139   { STRING_COMMA_LEN (".vaes"), PROCESSOR_UNKNOWN,
1140     CPU_VAES_FLAGS, 0 },
1141   { STRING_COMMA_LEN (".vpclmulqdq"), PROCESSOR_UNKNOWN,
1142     CPU_VPCLMULQDQ_FLAGS, 0 },
1143   { STRING_COMMA_LEN (".wbnoinvd"), PROCESSOR_UNKNOWN,
1144     CPU_WBNOINVD_FLAGS, 0 },
1145   { STRING_COMMA_LEN (".pconfig"), PROCESSOR_UNKNOWN,
1146     CPU_PCONFIG_FLAGS, 0 },
1147   { STRING_COMMA_LEN (".waitpkg"), PROCESSOR_UNKNOWN,
1148     CPU_WAITPKG_FLAGS, 0 },
1149   { STRING_COMMA_LEN (".cldemote"), PROCESSOR_UNKNOWN,
1150     CPU_CLDEMOTE_FLAGS, 0 },
1151   { STRING_COMMA_LEN (".movdiri"), PROCESSOR_UNKNOWN,
1152     CPU_MOVDIRI_FLAGS, 0 },
1153   { STRING_COMMA_LEN (".movdir64b"), PROCESSOR_UNKNOWN,
1154     CPU_MOVDIR64B_FLAGS, 0 },
1155   { STRING_COMMA_LEN (".avx512_bf16"), PROCESSOR_UNKNOWN,
1156     CPU_AVX512_BF16_FLAGS, 0 },
1157   { STRING_COMMA_LEN (".avx512_vp2intersect"), PROCESSOR_UNKNOWN,
1158     CPU_AVX512_VP2INTERSECT_FLAGS, 0 },
1159   { STRING_COMMA_LEN (".enqcmd"), PROCESSOR_UNKNOWN,
1160     CPU_ENQCMD_FLAGS, 0 },
1161   { STRING_COMMA_LEN (".rdpru"), PROCESSOR_UNKNOWN,
1162     CPU_RDPRU_FLAGS, 0 },
1163   { STRING_COMMA_LEN (".mcommit"), PROCESSOR_UNKNOWN,
1164     CPU_MCOMMIT_FLAGS, 0 },
1165 };
1166 
1167 static const noarch_entry cpu_noarch[] =
1168 {
1169   { STRING_COMMA_LEN ("no87"),  CPU_ANY_X87_FLAGS },
1170   { STRING_COMMA_LEN ("no287"),  CPU_ANY_287_FLAGS },
1171   { STRING_COMMA_LEN ("no387"),  CPU_ANY_387_FLAGS },
1172   { STRING_COMMA_LEN ("no687"),  CPU_ANY_687_FLAGS },
1173   { STRING_COMMA_LEN ("nocmov"),  CPU_ANY_CMOV_FLAGS },
1174   { STRING_COMMA_LEN ("nofxsr"),  CPU_ANY_FXSR_FLAGS },
1175   { STRING_COMMA_LEN ("nommx"),  CPU_ANY_MMX_FLAGS },
1176   { STRING_COMMA_LEN ("nosse"),  CPU_ANY_SSE_FLAGS },
1177   { STRING_COMMA_LEN ("nosse2"),  CPU_ANY_SSE2_FLAGS },
1178   { STRING_COMMA_LEN ("nosse3"),  CPU_ANY_SSE3_FLAGS },
1179   { STRING_COMMA_LEN ("nossse3"),  CPU_ANY_SSSE3_FLAGS },
1180   { STRING_COMMA_LEN ("nosse4.1"),  CPU_ANY_SSE4_1_FLAGS },
1181   { STRING_COMMA_LEN ("nosse4.2"),  CPU_ANY_SSE4_2_FLAGS },
1182   { STRING_COMMA_LEN ("nosse4"),  CPU_ANY_SSE4_1_FLAGS },
1183   { STRING_COMMA_LEN ("noavx"),  CPU_ANY_AVX_FLAGS },
1184   { STRING_COMMA_LEN ("noavx2"),  CPU_ANY_AVX2_FLAGS },
1185   { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS },
1186   { STRING_COMMA_LEN ("noavx512cd"), CPU_ANY_AVX512CD_FLAGS },
1187   { STRING_COMMA_LEN ("noavx512er"), CPU_ANY_AVX512ER_FLAGS },
1188   { STRING_COMMA_LEN ("noavx512pf"), CPU_ANY_AVX512PF_FLAGS },
1189   { STRING_COMMA_LEN ("noavx512dq"), CPU_ANY_AVX512DQ_FLAGS },
1190   { STRING_COMMA_LEN ("noavx512bw"), CPU_ANY_AVX512BW_FLAGS },
1191   { STRING_COMMA_LEN ("noavx512vl"), CPU_ANY_AVX512VL_FLAGS },
1192   { STRING_COMMA_LEN ("noavx512ifma"), CPU_ANY_AVX512IFMA_FLAGS },
1193   { STRING_COMMA_LEN ("noavx512vbmi"), CPU_ANY_AVX512VBMI_FLAGS },
1194   { STRING_COMMA_LEN ("noavx512_4fmaps"), CPU_ANY_AVX512_4FMAPS_FLAGS },
1195   { STRING_COMMA_LEN ("noavx512_4vnniw"), CPU_ANY_AVX512_4VNNIW_FLAGS },
1196   { STRING_COMMA_LEN ("noavx512_vpopcntdq"), CPU_ANY_AVX512_VPOPCNTDQ_FLAGS },
1197   { STRING_COMMA_LEN ("noavx512_vbmi2"), CPU_ANY_AVX512_VBMI2_FLAGS },
1198   { STRING_COMMA_LEN ("noavx512_vnni"), CPU_ANY_AVX512_VNNI_FLAGS },
1199   { STRING_COMMA_LEN ("noavx512_bitalg"), CPU_ANY_AVX512_BITALG_FLAGS },
1200   { STRING_COMMA_LEN ("noibt"), CPU_ANY_IBT_FLAGS },
1201   { STRING_COMMA_LEN ("noshstk"), CPU_ANY_SHSTK_FLAGS },
1202   { STRING_COMMA_LEN ("nomovdiri"), CPU_ANY_MOVDIRI_FLAGS },
1203   { STRING_COMMA_LEN ("nomovdir64b"), CPU_ANY_MOVDIR64B_FLAGS },
1204   { STRING_COMMA_LEN ("noavx512_bf16"), CPU_ANY_AVX512_BF16_FLAGS },
1205   { STRING_COMMA_LEN ("noavx512_vp2intersect"), CPU_ANY_SHSTK_FLAGS },
1206   { STRING_COMMA_LEN ("noenqcmd"), CPU_ANY_ENQCMD_FLAGS },
1207 };
1208 
1209 #ifdef I386COFF
1210 /* Like s_lcomm_internal in gas/read.c but the alignment string
1211    is allowed to be optional.  */
1212 
1213 static symbolS *
1214 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1215 {
1216   addressT align = 0;
1217 
1218   SKIP_WHITESPACE ();
1219 
1220   if (needs_align
1221       && *input_line_pointer == ',')
1222     {
1223       align = parse_align (needs_align - 1);
1224 
1225       if (align == (addressT) -1)
1226 	return NULL;
1227     }
1228   else
1229     {
1230       if (size >= 8)
1231 	align = 3;
1232       else if (size >= 4)
1233 	align = 2;
1234       else if (size >= 2)
1235 	align = 1;
1236       else
1237 	align = 0;
1238     }
1239 
1240   bss_alloc (symbolP, size, align);
1241   return symbolP;
1242 }
1243 
1244 static void
1245 pe_lcomm (int needs_align)
1246 {
1247   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1248 }
1249 #endif
1250 
1251 const pseudo_typeS md_pseudo_table[] =
1252 {
1253 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1254   {"align", s_align_bytes, 0},
1255 #else
1256   {"align", s_align_ptwo, 0},
1257 #endif
1258   {"arch", set_cpu_arch, 0},
1259 #ifndef I386COFF
1260   {"bss", s_bss, 0},
1261 #else
1262   {"lcomm", pe_lcomm, 1},
1263 #endif
1264   {"ffloat", float_cons, 'f'},
1265   {"dfloat", float_cons, 'd'},
1266   {"tfloat", float_cons, 'x'},
1267   {"value", cons, 2},
1268   {"slong", signed_cons, 4},
1269   {"noopt", s_ignore, 0},
1270   {"optim", s_ignore, 0},
1271   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1272   {"code16", set_code_flag, CODE_16BIT},
1273   {"code32", set_code_flag, CODE_32BIT},
1274 #ifdef BFD64
1275   {"code64", set_code_flag, CODE_64BIT},
1276 #endif
1277   {"intel_syntax", set_intel_syntax, 1},
1278   {"att_syntax", set_intel_syntax, 0},
1279   {"intel_mnemonic", set_intel_mnemonic, 1},
1280   {"att_mnemonic", set_intel_mnemonic, 0},
1281   {"allow_index_reg", set_allow_index_reg, 1},
1282   {"disallow_index_reg", set_allow_index_reg, 0},
1283   {"sse_check", set_check, 0},
1284   {"operand_check", set_check, 1},
1285 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1286   {"largecomm", handle_large_common, 0},
1287 #else
1288   {"file", dwarf2_directive_file, 0},
1289   {"loc", dwarf2_directive_loc, 0},
1290   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1291 #endif
1292 #ifdef TE_PE
1293   {"secrel32", pe_directive_secrel, 0},
1294 #endif
1295   {0, 0, 0}
1296 };
1297 
1298 /* For interface with expression ().  */
1299 extern char *input_line_pointer;
1300 
1301 /* Hash table for instruction mnemonic lookup.  */
1302 static struct hash_control *op_hash;
1303 
1304 /* Hash table for register lookup.  */
1305 static struct hash_control *reg_hash;
1306 
1307   /* Various efficient no-op patterns for aligning code labels.
1308      Note: Don't try to assemble the instructions in the comments.
1309      0L and 0w are not legal.  */
1310 static const unsigned char f32_1[] =
1311   {0x90};				/* nop			*/
1312 static const unsigned char f32_2[] =
1313   {0x66,0x90};				/* xchg %ax,%ax		*/
1314 static const unsigned char f32_3[] =
1315   {0x8d,0x76,0x00};			/* leal 0(%esi),%esi	*/
1316 static const unsigned char f32_4[] =
1317   {0x8d,0x74,0x26,0x00};		/* leal 0(%esi,1),%esi	*/
1318 static const unsigned char f32_6[] =
1319   {0x8d,0xb6,0x00,0x00,0x00,0x00};	/* leal 0L(%esi),%esi	*/
1320 static const unsigned char f32_7[] =
1321   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00};	/* leal 0L(%esi,1),%esi */
1322 static const unsigned char f16_3[] =
1323   {0x8d,0x74,0x00};			/* lea 0(%si),%si	*/
1324 static const unsigned char f16_4[] =
1325   {0x8d,0xb4,0x00,0x00};		/* lea 0W(%si),%si	*/
1326 static const unsigned char jump_disp8[] =
1327   {0xeb};				/* jmp disp8	       */
1328 static const unsigned char jump32_disp32[] =
1329   {0xe9};				/* jmp disp32	       */
1330 static const unsigned char jump16_disp32[] =
1331   {0x66,0xe9};				/* jmp disp32	       */
1332 /* 32-bit NOPs patterns.  */
1333 static const unsigned char *const f32_patt[] = {
1334   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1335 };
1336 /* 16-bit NOPs patterns.  */
1337 static const unsigned char *const f16_patt[] = {
1338   f32_1, f32_2, f16_3, f16_4
1339 };
1340 /* nopl (%[re]ax) */
1341 static const unsigned char alt_3[] =
1342   {0x0f,0x1f,0x00};
1343 /* nopl 0(%[re]ax) */
1344 static const unsigned char alt_4[] =
1345   {0x0f,0x1f,0x40,0x00};
1346 /* nopl 0(%[re]ax,%[re]ax,1) */
1347 static const unsigned char alt_5[] =
1348   {0x0f,0x1f,0x44,0x00,0x00};
1349 /* nopw 0(%[re]ax,%[re]ax,1) */
1350 static const unsigned char alt_6[] =
1351   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1352 /* nopl 0L(%[re]ax) */
1353 static const unsigned char alt_7[] =
1354   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1355 /* nopl 0L(%[re]ax,%[re]ax,1) */
1356 static const unsigned char alt_8[] =
1357   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1358 /* nopw 0L(%[re]ax,%[re]ax,1) */
1359 static const unsigned char alt_9[] =
1360   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1361 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1362 static const unsigned char alt_10[] =
1363   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1364 /* data16 nopw %cs:0L(%eax,%eax,1) */
1365 static const unsigned char alt_11[] =
1366   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1367 /* 32-bit and 64-bit NOPs patterns.  */
1368 static const unsigned char *const alt_patt[] = {
1369   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1370   alt_9, alt_10, alt_11
1371 };
1372 
1373 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1374    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1375 
1376 static void
1377 i386_output_nops (char *where, const unsigned char *const *patt,
1378 		  int count, int max_single_nop_size)
1379 
1380 {
1381   /* Place the longer NOP first.  */
1382   int last;
1383   int offset;
1384   const unsigned char *nops;
1385 
1386   if (max_single_nop_size < 1)
1387     {
1388       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1389 		max_single_nop_size);
1390       return;
1391     }
1392 
1393   nops = patt[max_single_nop_size - 1];
1394 
1395   /* Use the smaller one if the requsted one isn't available.  */
1396   if (nops == NULL)
1397     {
1398       max_single_nop_size--;
1399       nops = patt[max_single_nop_size - 1];
1400     }
1401 
1402   last = count % max_single_nop_size;
1403 
1404   count -= last;
1405   for (offset = 0; offset < count; offset += max_single_nop_size)
1406     memcpy (where + offset, nops, max_single_nop_size);
1407 
1408   if (last)
1409     {
1410       nops = patt[last - 1];
1411       if (nops == NULL)
1412 	{
1413 	  /* Use the smaller one plus one-byte NOP if the needed one
1414 	     isn't available.  */
1415 	  last--;
1416 	  nops = patt[last - 1];
1417 	  memcpy (where + offset, nops, last);
1418 	  where[offset + last] = *patt[0];
1419 	}
1420       else
1421 	memcpy (where + offset, nops, last);
1422     }
1423 }
1424 
1425 static INLINE int
1426 fits_in_imm7 (offsetT num)
1427 {
1428   return (num & 0x7f) == num;
1429 }
1430 
1431 static INLINE int
1432 fits_in_imm31 (offsetT num)
1433 {
1434   return (num & 0x7fffffff) == num;
1435 }
1436 
1437 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1438    single NOP instruction LIMIT.  */
1439 
1440 void
1441 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1442 {
1443   const unsigned char *const *patt = NULL;
1444   int max_single_nop_size;
1445   /* Maximum number of NOPs before switching to jump over NOPs.  */
1446   int max_number_of_nops;
1447 
1448   switch (fragP->fr_type)
1449     {
1450     case rs_fill_nop:
1451     case rs_align_code:
1452       break;
1453     case rs_machine_dependent:
1454       /* Allow NOP padding for jumps and calls.  */
1455       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1456 	  || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1457 	break;
1458       /* Fall through.  */
1459     default:
1460       return;
1461     }
1462 
1463   /* We need to decide which NOP sequence to use for 32bit and
1464      64bit. When -mtune= is used:
1465 
1466      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1467      PROCESSOR_GENERIC32, f32_patt will be used.
1468      2. For the rest, alt_patt will be used.
1469 
1470      When -mtune= isn't used, alt_patt will be used if
1471      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1472      be used.
1473 
1474      When -march= or .arch is used, we can't use anything beyond
1475      cpu_arch_isa_flags.   */
1476 
1477   if (flag_code == CODE_16BIT)
1478     {
1479       patt = f16_patt;
1480       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1481       /* Limit number of NOPs to 2 in 16-bit mode.  */
1482       max_number_of_nops = 2;
1483     }
1484   else
1485     {
1486       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1487 	{
1488 	  /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1489 	  switch (cpu_arch_tune)
1490 	    {
1491 	    case PROCESSOR_UNKNOWN:
1492 	      /* We use cpu_arch_isa_flags to check if we SHOULD
1493 		 optimize with nops.  */
1494 	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1495 		patt = alt_patt;
1496 	      else
1497 		patt = f32_patt;
1498 	      break;
1499 	    case PROCESSOR_PENTIUM4:
1500 	    case PROCESSOR_NOCONA:
1501 	    case PROCESSOR_CORE:
1502 	    case PROCESSOR_CORE2:
1503 	    case PROCESSOR_COREI7:
1504 	    case PROCESSOR_L1OM:
1505 	    case PROCESSOR_K1OM:
1506 	    case PROCESSOR_GENERIC64:
1507 	    case PROCESSOR_K6:
1508 	    case PROCESSOR_ATHLON:
1509 	    case PROCESSOR_K8:
1510 	    case PROCESSOR_AMDFAM10:
1511 	    case PROCESSOR_BD:
1512 	    case PROCESSOR_ZNVER:
1513 	    case PROCESSOR_BT:
1514 	      patt = alt_patt;
1515 	      break;
1516 	    case PROCESSOR_I386:
1517 	    case PROCESSOR_I486:
1518 	    case PROCESSOR_PENTIUM:
1519 	    case PROCESSOR_PENTIUMPRO:
1520 	    case PROCESSOR_IAMCU:
1521 	    case PROCESSOR_GENERIC32:
1522 	      patt = f32_patt;
1523 	      break;
1524 	    }
1525 	}
1526       else
1527 	{
1528 	  switch (fragP->tc_frag_data.tune)
1529 	    {
1530 	    case PROCESSOR_UNKNOWN:
1531 	      /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1532 		 PROCESSOR_UNKNOWN.  */
1533 	      abort ();
1534 	      break;
1535 
1536 	    case PROCESSOR_I386:
1537 	    case PROCESSOR_I486:
1538 	    case PROCESSOR_PENTIUM:
1539 	    case PROCESSOR_IAMCU:
1540 	    case PROCESSOR_K6:
1541 	    case PROCESSOR_ATHLON:
1542 	    case PROCESSOR_K8:
1543 	    case PROCESSOR_AMDFAM10:
1544 	    case PROCESSOR_BD:
1545 	    case PROCESSOR_ZNVER:
1546 	    case PROCESSOR_BT:
1547 	    case PROCESSOR_GENERIC32:
1548 	      /* We use cpu_arch_isa_flags to check if we CAN optimize
1549 		 with nops.  */
1550 	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1551 		patt = alt_patt;
1552 	      else
1553 		patt = f32_patt;
1554 	      break;
1555 	    case PROCESSOR_PENTIUMPRO:
1556 	    case PROCESSOR_PENTIUM4:
1557 	    case PROCESSOR_NOCONA:
1558 	    case PROCESSOR_CORE:
1559 	    case PROCESSOR_CORE2:
1560 	    case PROCESSOR_COREI7:
1561 	    case PROCESSOR_L1OM:
1562 	    case PROCESSOR_K1OM:
1563 	      if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1564 		patt = alt_patt;
1565 	      else
1566 		patt = f32_patt;
1567 	      break;
1568 	    case PROCESSOR_GENERIC64:
1569 	      patt = alt_patt;
1570 	      break;
1571 	    }
1572 	}
1573 
1574       if (patt == f32_patt)
1575 	{
1576 	  max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1577 	  /* Limit number of NOPs to 2 for older processors.  */
1578 	  max_number_of_nops = 2;
1579 	}
1580       else
1581 	{
1582 	  max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1583 	  /* Limit number of NOPs to 7 for newer processors.  */
1584 	  max_number_of_nops = 7;
1585 	}
1586     }
1587 
1588   if (limit == 0)
1589     limit = max_single_nop_size;
1590 
1591   if (fragP->fr_type == rs_fill_nop)
1592     {
1593       /* Output NOPs for .nop directive.  */
1594       if (limit > max_single_nop_size)
1595 	{
1596 	  as_bad_where (fragP->fr_file, fragP->fr_line,
1597 			_("invalid single nop size: %d "
1598 			  "(expect within [0, %d])"),
1599 			limit, max_single_nop_size);
1600 	  return;
1601 	}
1602     }
1603   else if (fragP->fr_type != rs_machine_dependent)
1604     fragP->fr_var = count;
1605 
1606   if ((count / max_single_nop_size) > max_number_of_nops)
1607     {
1608       /* Generate jump over NOPs.  */
1609       offsetT disp = count - 2;
1610       if (fits_in_imm7 (disp))
1611 	{
1612 	  /* Use "jmp disp8" if possible.  */
1613 	  count = disp;
1614 	  where[0] = jump_disp8[0];
1615 	  where[1] = count;
1616 	  where += 2;
1617 	}
1618       else
1619 	{
1620 	  unsigned int size_of_jump;
1621 
1622 	  if (flag_code == CODE_16BIT)
1623 	    {
1624 	      where[0] = jump16_disp32[0];
1625 	      where[1] = jump16_disp32[1];
1626 	      size_of_jump = 2;
1627 	    }
1628 	  else
1629 	    {
1630 	      where[0] = jump32_disp32[0];
1631 	      size_of_jump = 1;
1632 	    }
1633 
1634 	  count -= size_of_jump + 4;
1635 	  if (!fits_in_imm31 (count))
1636 	    {
1637 	      as_bad_where (fragP->fr_file, fragP->fr_line,
1638 			    _("jump over nop padding out of range"));
1639 	      return;
1640 	    }
1641 
1642 	  md_number_to_chars (where + size_of_jump, count, 4);
1643 	  where += size_of_jump + 4;
1644 	}
1645     }
1646 
1647   /* Generate multiple NOPs.  */
1648   i386_output_nops (where, patt, count, limit);
1649 }
1650 
1651 static INLINE int
1652 operand_type_all_zero (const union i386_operand_type *x)
1653 {
1654   switch (ARRAY_SIZE(x->array))
1655     {
1656     case 3:
1657       if (x->array[2])
1658 	return 0;
1659       /* Fall through.  */
1660     case 2:
1661       if (x->array[1])
1662 	return 0;
1663       /* Fall through.  */
1664     case 1:
1665       return !x->array[0];
1666     default:
1667       abort ();
1668     }
1669 }
1670 
1671 static INLINE void
1672 operand_type_set (union i386_operand_type *x, unsigned int v)
1673 {
1674   switch (ARRAY_SIZE(x->array))
1675     {
1676     case 3:
1677       x->array[2] = v;
1678       /* Fall through.  */
1679     case 2:
1680       x->array[1] = v;
1681       /* Fall through.  */
1682     case 1:
1683       x->array[0] = v;
1684       /* Fall through.  */
1685       break;
1686     default:
1687       abort ();
1688     }
1689 
1690   x->bitfield.class = ClassNone;
1691   x->bitfield.instance = InstanceNone;
1692 }
1693 
1694 static INLINE int
1695 operand_type_equal (const union i386_operand_type *x,
1696 		    const union i386_operand_type *y)
1697 {
1698   switch (ARRAY_SIZE(x->array))
1699     {
1700     case 3:
1701       if (x->array[2] != y->array[2])
1702 	return 0;
1703       /* Fall through.  */
1704     case 2:
1705       if (x->array[1] != y->array[1])
1706 	return 0;
1707       /* Fall through.  */
1708     case 1:
1709       return x->array[0] == y->array[0];
1710       break;
1711     default:
1712       abort ();
1713     }
1714 }
1715 
1716 static INLINE int
1717 cpu_flags_all_zero (const union i386_cpu_flags *x)
1718 {
1719   switch (ARRAY_SIZE(x->array))
1720     {
1721     case 4:
1722       if (x->array[3])
1723 	return 0;
1724       /* Fall through.  */
1725     case 3:
1726       if (x->array[2])
1727 	return 0;
1728       /* Fall through.  */
1729     case 2:
1730       if (x->array[1])
1731 	return 0;
1732       /* Fall through.  */
1733     case 1:
1734       return !x->array[0];
1735     default:
1736       abort ();
1737     }
1738 }
1739 
1740 static INLINE int
1741 cpu_flags_equal (const union i386_cpu_flags *x,
1742 		 const union i386_cpu_flags *y)
1743 {
1744   switch (ARRAY_SIZE(x->array))
1745     {
1746     case 4:
1747       if (x->array[3] != y->array[3])
1748 	return 0;
1749       /* Fall through.  */
1750     case 3:
1751       if (x->array[2] != y->array[2])
1752 	return 0;
1753       /* Fall through.  */
1754     case 2:
1755       if (x->array[1] != y->array[1])
1756 	return 0;
1757       /* Fall through.  */
1758     case 1:
1759       return x->array[0] == y->array[0];
1760       break;
1761     default:
1762       abort ();
1763     }
1764 }
1765 
1766 static INLINE int
1767 cpu_flags_check_cpu64 (i386_cpu_flags f)
1768 {
1769   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1770 	   || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1771 }
1772 
1773 static INLINE i386_cpu_flags
1774 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1775 {
1776   switch (ARRAY_SIZE (x.array))
1777     {
1778     case 4:
1779       x.array [3] &= y.array [3];
1780       /* Fall through.  */
1781     case 3:
1782       x.array [2] &= y.array [2];
1783       /* Fall through.  */
1784     case 2:
1785       x.array [1] &= y.array [1];
1786       /* Fall through.  */
1787     case 1:
1788       x.array [0] &= y.array [0];
1789       break;
1790     default:
1791       abort ();
1792     }
1793   return x;
1794 }
1795 
1796 static INLINE i386_cpu_flags
1797 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1798 {
1799   switch (ARRAY_SIZE (x.array))
1800     {
1801     case 4:
1802       x.array [3] |= y.array [3];
1803       /* Fall through.  */
1804     case 3:
1805       x.array [2] |= y.array [2];
1806       /* Fall through.  */
1807     case 2:
1808       x.array [1] |= y.array [1];
1809       /* Fall through.  */
1810     case 1:
1811       x.array [0] |= y.array [0];
1812       break;
1813     default:
1814       abort ();
1815     }
1816   return x;
1817 }
1818 
1819 static INLINE i386_cpu_flags
1820 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1821 {
1822   switch (ARRAY_SIZE (x.array))
1823     {
1824     case 4:
1825       x.array [3] &= ~y.array [3];
1826       /* Fall through.  */
1827     case 3:
1828       x.array [2] &= ~y.array [2];
1829       /* Fall through.  */
1830     case 2:
1831       x.array [1] &= ~y.array [1];
1832       /* Fall through.  */
1833     case 1:
1834       x.array [0] &= ~y.array [0];
1835       break;
1836     default:
1837       abort ();
1838     }
1839   return x;
1840 }
1841 
1842 #define CPU_FLAGS_ARCH_MATCH		0x1
1843 #define CPU_FLAGS_64BIT_MATCH		0x2
1844 
1845 #define CPU_FLAGS_PERFECT_MATCH \
1846   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1847 
1848 /* Return CPU flags match bits. */
1849 
1850 static int
1851 cpu_flags_match (const insn_template *t)
1852 {
1853   i386_cpu_flags x = t->cpu_flags;
1854   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1855 
1856   x.bitfield.cpu64 = 0;
1857   x.bitfield.cpuno64 = 0;
1858 
1859   if (cpu_flags_all_zero (&x))
1860     {
1861       /* This instruction is available on all archs.  */
1862       match |= CPU_FLAGS_ARCH_MATCH;
1863     }
1864   else
1865     {
1866       /* This instruction is available only on some archs.  */
1867       i386_cpu_flags cpu = cpu_arch_flags;
1868 
1869       /* AVX512VL is no standalone feature - match it and then strip it.  */
1870       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1871 	return match;
1872       x.bitfield.cpuavx512vl = 0;
1873 
1874       cpu = cpu_flags_and (x, cpu);
1875       if (!cpu_flags_all_zero (&cpu))
1876 	{
1877 	  if (x.bitfield.cpuavx)
1878 	    {
1879 	      /* We need to check a few extra flags with AVX.  */
1880 	      if (cpu.bitfield.cpuavx
1881 		  && (!t->opcode_modifier.sse2avx || sse2avx)
1882 		  && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1883 		  && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1884 		  && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1885 		match |= CPU_FLAGS_ARCH_MATCH;
1886 	    }
1887 	  else if (x.bitfield.cpuavx512f)
1888 	    {
1889 	      /* We need to check a few extra flags with AVX512F.  */
1890 	      if (cpu.bitfield.cpuavx512f
1891 		  && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1892 		  && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1893 		  && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1894 		match |= CPU_FLAGS_ARCH_MATCH;
1895 	    }
1896 	  else
1897 	    match |= CPU_FLAGS_ARCH_MATCH;
1898 	}
1899     }
1900   return match;
1901 }
1902 
1903 static INLINE i386_operand_type
1904 operand_type_and (i386_operand_type x, i386_operand_type y)
1905 {
1906   if (x.bitfield.class != y.bitfield.class)
1907     x.bitfield.class = ClassNone;
1908   if (x.bitfield.instance != y.bitfield.instance)
1909     x.bitfield.instance = InstanceNone;
1910 
1911   switch (ARRAY_SIZE (x.array))
1912     {
1913     case 3:
1914       x.array [2] &= y.array [2];
1915       /* Fall through.  */
1916     case 2:
1917       x.array [1] &= y.array [1];
1918       /* Fall through.  */
1919     case 1:
1920       x.array [0] &= y.array [0];
1921       break;
1922     default:
1923       abort ();
1924     }
1925   return x;
1926 }
1927 
1928 static INLINE i386_operand_type
1929 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1930 {
1931   gas_assert (y.bitfield.class == ClassNone);
1932   gas_assert (y.bitfield.instance == InstanceNone);
1933 
1934   switch (ARRAY_SIZE (x.array))
1935     {
1936     case 3:
1937       x.array [2] &= ~y.array [2];
1938       /* Fall through.  */
1939     case 2:
1940       x.array [1] &= ~y.array [1];
1941       /* Fall through.  */
1942     case 1:
1943       x.array [0] &= ~y.array [0];
1944       break;
1945     default:
1946       abort ();
1947     }
1948   return x;
1949 }
1950 
1951 static INLINE i386_operand_type
1952 operand_type_or (i386_operand_type x, i386_operand_type y)
1953 {
1954   gas_assert (x.bitfield.class == ClassNone ||
1955               y.bitfield.class == ClassNone ||
1956               x.bitfield.class == y.bitfield.class);
1957   gas_assert (x.bitfield.instance == InstanceNone ||
1958               y.bitfield.instance == InstanceNone ||
1959               x.bitfield.instance == y.bitfield.instance);
1960 
1961   switch (ARRAY_SIZE (x.array))
1962     {
1963     case 3:
1964       x.array [2] |= y.array [2];
1965       /* Fall through.  */
1966     case 2:
1967       x.array [1] |= y.array [1];
1968       /* Fall through.  */
1969     case 1:
1970       x.array [0] |= y.array [0];
1971       break;
1972     default:
1973       abort ();
1974     }
1975   return x;
1976 }
1977 
1978 static INLINE i386_operand_type
1979 operand_type_xor (i386_operand_type x, i386_operand_type y)
1980 {
1981   gas_assert (y.bitfield.class == ClassNone);
1982   gas_assert (y.bitfield.instance == InstanceNone);
1983 
1984   switch (ARRAY_SIZE (x.array))
1985     {
1986     case 3:
1987       x.array [2] ^= y.array [2];
1988       /* Fall through.  */
1989     case 2:
1990       x.array [1] ^= y.array [1];
1991       /* Fall through.  */
1992     case 1:
1993       x.array [0] ^= y.array [0];
1994       break;
1995     default:
1996       abort ();
1997     }
1998   return x;
1999 }
2000 
2001 static const i386_operand_type disp16 = OPERAND_TYPE_DISP16;
2002 static const i386_operand_type disp32 = OPERAND_TYPE_DISP32;
2003 static const i386_operand_type disp32s = OPERAND_TYPE_DISP32S;
2004 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
2005 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
2006 static const i386_operand_type anyimm = OPERAND_TYPE_ANYIMM;
2007 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
2008 static const i386_operand_type regmask = OPERAND_TYPE_REGMASK;
2009 static const i386_operand_type imm8 = OPERAND_TYPE_IMM8;
2010 static const i386_operand_type imm8s = OPERAND_TYPE_IMM8S;
2011 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
2012 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
2013 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
2014 static const i386_operand_type imm64 = OPERAND_TYPE_IMM64;
2015 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
2016 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
2017 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
2018 
2019 enum operand_type
2020 {
2021   reg,
2022   imm,
2023   disp,
2024   anymem
2025 };
2026 
2027 static INLINE int
2028 operand_type_check (i386_operand_type t, enum operand_type c)
2029 {
2030   switch (c)
2031     {
2032     case reg:
2033       return t.bitfield.class == Reg;
2034 
2035     case imm:
2036       return (t.bitfield.imm8
2037 	      || t.bitfield.imm8s
2038 	      || t.bitfield.imm16
2039 	      || t.bitfield.imm32
2040 	      || t.bitfield.imm32s
2041 	      || t.bitfield.imm64);
2042 
2043     case disp:
2044       return (t.bitfield.disp8
2045 	      || t.bitfield.disp16
2046 	      || t.bitfield.disp32
2047 	      || t.bitfield.disp32s
2048 	      || t.bitfield.disp64);
2049 
2050     case anymem:
2051       return (t.bitfield.disp8
2052 	      || t.bitfield.disp16
2053 	      || t.bitfield.disp32
2054 	      || t.bitfield.disp32s
2055 	      || t.bitfield.disp64
2056 	      || t.bitfield.baseindex);
2057 
2058     default:
2059       abort ();
2060     }
2061 
2062   return 0;
2063 }
2064 
2065 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2066    between operand GIVEN and opeand WANTED for instruction template T.  */
2067 
2068 static INLINE int
2069 match_operand_size (const insn_template *t, unsigned int wanted,
2070 		    unsigned int given)
2071 {
2072   return !((i.types[given].bitfield.byte
2073 	    && !t->operand_types[wanted].bitfield.byte)
2074 	   || (i.types[given].bitfield.word
2075 	       && !t->operand_types[wanted].bitfield.word)
2076 	   || (i.types[given].bitfield.dword
2077 	       && !t->operand_types[wanted].bitfield.dword)
2078 	   || (i.types[given].bitfield.qword
2079 	       && !t->operand_types[wanted].bitfield.qword)
2080 	   || (i.types[given].bitfield.tbyte
2081 	       && !t->operand_types[wanted].bitfield.tbyte));
2082 }
2083 
2084 /* Return 1 if there is no conflict in SIMD register between operand
2085    GIVEN and opeand WANTED for instruction template T.  */
2086 
2087 static INLINE int
2088 match_simd_size (const insn_template *t, unsigned int wanted,
2089 		 unsigned int given)
2090 {
2091   return !((i.types[given].bitfield.xmmword
2092 	    && !t->operand_types[wanted].bitfield.xmmword)
2093 	   || (i.types[given].bitfield.ymmword
2094 	       && !t->operand_types[wanted].bitfield.ymmword)
2095 	   || (i.types[given].bitfield.zmmword
2096 	       && !t->operand_types[wanted].bitfield.zmmword));
2097 }
2098 
2099 /* Return 1 if there is no conflict in any size between operand GIVEN
2100    and opeand WANTED for instruction template T.  */
2101 
2102 static INLINE int
2103 match_mem_size (const insn_template *t, unsigned int wanted,
2104 		unsigned int given)
2105 {
2106   return (match_operand_size (t, wanted, given)
2107 	  && !((i.types[given].bitfield.unspecified
2108 		&& !i.broadcast
2109 		&& !t->operand_types[wanted].bitfield.unspecified)
2110 	       || (i.types[given].bitfield.fword
2111 		   && !t->operand_types[wanted].bitfield.fword)
2112 	       /* For scalar opcode templates to allow register and memory
2113 		  operands at the same time, some special casing is needed
2114 		  here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2115 		  down-conversion vpmov*.  */
2116 	       || ((t->operand_types[wanted].bitfield.class == RegSIMD
2117 		    && !t->opcode_modifier.broadcast
2118 		    && (t->operand_types[wanted].bitfield.byte
2119 			|| t->operand_types[wanted].bitfield.word
2120 			|| t->operand_types[wanted].bitfield.dword
2121 			|| t->operand_types[wanted].bitfield.qword))
2122 		   ? (i.types[given].bitfield.xmmword
2123 		      || i.types[given].bitfield.ymmword
2124 		      || i.types[given].bitfield.zmmword)
2125 		   : !match_simd_size(t, wanted, given))));
2126 }
2127 
2128 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2129    operands for instruction template T, and it has MATCH_REVERSE set if there
2130    is no size conflict on any operands for the template with operands reversed
2131    (and the template allows for reversing in the first place).  */
2132 
2133 #define MATCH_STRAIGHT 1
2134 #define MATCH_REVERSE  2
2135 
2136 static INLINE unsigned int
2137 operand_size_match (const insn_template *t)
2138 {
2139   unsigned int j, match = MATCH_STRAIGHT;
2140 
2141   /* Don't check non-absolute jump instructions.  */
2142   if (t->opcode_modifier.jump
2143       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2144     return match;
2145 
2146   /* Check memory and accumulator operand size.  */
2147   for (j = 0; j < i.operands; j++)
2148     {
2149       if (i.types[j].bitfield.class != Reg
2150 	  && i.types[j].bitfield.class != RegSIMD
2151 	  && t->opcode_modifier.anysize)
2152 	continue;
2153 
2154       if (t->operand_types[j].bitfield.class == Reg
2155 	  && !match_operand_size (t, j, j))
2156 	{
2157 	  match = 0;
2158 	  break;
2159 	}
2160 
2161       if (t->operand_types[j].bitfield.class == RegSIMD
2162 	  && !match_simd_size (t, j, j))
2163 	{
2164 	  match = 0;
2165 	  break;
2166 	}
2167 
2168       if (t->operand_types[j].bitfield.instance == Accum
2169 	  && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2170 	{
2171 	  match = 0;
2172 	  break;
2173 	}
2174 
2175       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2176 	{
2177 	  match = 0;
2178 	  break;
2179 	}
2180     }
2181 
2182   if (!t->opcode_modifier.d)
2183     {
2184 mismatch:
2185       if (!match)
2186 	i.error = operand_size_mismatch;
2187       return match;
2188     }
2189 
2190   /* Check reverse.  */
2191   gas_assert (i.operands >= 2 && i.operands <= 3);
2192 
2193   for (j = 0; j < i.operands; j++)
2194     {
2195       unsigned int given = i.operands - j - 1;
2196 
2197       if (t->operand_types[j].bitfield.class == Reg
2198 	  && !match_operand_size (t, j, given))
2199 	goto mismatch;
2200 
2201       if (t->operand_types[j].bitfield.class == RegSIMD
2202 	  && !match_simd_size (t, j, given))
2203 	goto mismatch;
2204 
2205       if (t->operand_types[j].bitfield.instance == Accum
2206 	  && (!match_operand_size (t, j, given)
2207 	      || !match_simd_size (t, j, given)))
2208 	goto mismatch;
2209 
2210       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2211 	goto mismatch;
2212     }
2213 
2214   return match | MATCH_REVERSE;
2215 }
2216 
2217 static INLINE int
2218 operand_type_match (i386_operand_type overlap,
2219 		    i386_operand_type given)
2220 {
2221   i386_operand_type temp = overlap;
2222 
2223   temp.bitfield.unspecified = 0;
2224   temp.bitfield.byte = 0;
2225   temp.bitfield.word = 0;
2226   temp.bitfield.dword = 0;
2227   temp.bitfield.fword = 0;
2228   temp.bitfield.qword = 0;
2229   temp.bitfield.tbyte = 0;
2230   temp.bitfield.xmmword = 0;
2231   temp.bitfield.ymmword = 0;
2232   temp.bitfield.zmmword = 0;
2233   if (operand_type_all_zero (&temp))
2234     goto mismatch;
2235 
2236   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2237     return 1;
2238 
2239 mismatch:
2240   i.error = operand_type_mismatch;
2241   return 0;
2242 }
2243 
2244 /* If given types g0 and g1 are registers they must be of the same type
2245    unless the expected operand type register overlap is null.
2246    Memory operand size of certain SIMD instructions is also being checked
2247    here.  */
2248 
2249 static INLINE int
2250 operand_type_register_match (i386_operand_type g0,
2251 			     i386_operand_type t0,
2252 			     i386_operand_type g1,
2253 			     i386_operand_type t1)
2254 {
2255   if (g0.bitfield.class != Reg
2256       && g0.bitfield.class != RegSIMD
2257       && (!operand_type_check (g0, anymem)
2258 	  || g0.bitfield.unspecified
2259 	  || t0.bitfield.class != RegSIMD))
2260     return 1;
2261 
2262   if (g1.bitfield.class != Reg
2263       && g1.bitfield.class != RegSIMD
2264       && (!operand_type_check (g1, anymem)
2265 	  || g1.bitfield.unspecified
2266 	  || t1.bitfield.class != RegSIMD))
2267     return 1;
2268 
2269   if (g0.bitfield.byte == g1.bitfield.byte
2270       && g0.bitfield.word == g1.bitfield.word
2271       && g0.bitfield.dword == g1.bitfield.dword
2272       && g0.bitfield.qword == g1.bitfield.qword
2273       && g0.bitfield.xmmword == g1.bitfield.xmmword
2274       && g0.bitfield.ymmword == g1.bitfield.ymmword
2275       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2276     return 1;
2277 
2278   if (!(t0.bitfield.byte & t1.bitfield.byte)
2279       && !(t0.bitfield.word & t1.bitfield.word)
2280       && !(t0.bitfield.dword & t1.bitfield.dword)
2281       && !(t0.bitfield.qword & t1.bitfield.qword)
2282       && !(t0.bitfield.xmmword & t1.bitfield.xmmword)
2283       && !(t0.bitfield.ymmword & t1.bitfield.ymmword)
2284       && !(t0.bitfield.zmmword & t1.bitfield.zmmword))
2285     return 1;
2286 
2287   i.error = register_type_mismatch;
2288 
2289   return 0;
2290 }
2291 
2292 static INLINE unsigned int
2293 register_number (const reg_entry *r)
2294 {
2295   unsigned int nr = r->reg_num;
2296 
2297   if (r->reg_flags & RegRex)
2298     nr += 8;
2299 
2300   if (r->reg_flags & RegVRex)
2301     nr += 16;
2302 
2303   return nr;
2304 }
2305 
2306 static INLINE unsigned int
2307 mode_from_disp_size (i386_operand_type t)
2308 {
2309   if (t.bitfield.disp8)
2310     return 1;
2311   else if (t.bitfield.disp16
2312 	   || t.bitfield.disp32
2313 	   || t.bitfield.disp32s)
2314     return 2;
2315   else
2316     return 0;
2317 }
2318 
2319 static INLINE int
2320 fits_in_signed_byte (addressT num)
2321 {
2322   return num + 0x80 <= 0xff;
2323 }
2324 
2325 static INLINE int
2326 fits_in_unsigned_byte (addressT num)
2327 {
2328   return num <= 0xff;
2329 }
2330 
2331 static INLINE int
2332 fits_in_unsigned_word (addressT num)
2333 {
2334   return num <= 0xffff;
2335 }
2336 
2337 static INLINE int
2338 fits_in_signed_word (addressT num)
2339 {
2340   return num + 0x8000 <= 0xffff;
2341 }
2342 
2343 static INLINE int
2344 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2345 {
2346 #ifndef BFD64
2347   return 1;
2348 #else
2349   return num + 0x80000000 <= 0xffffffff;
2350 #endif
2351 }				/* fits_in_signed_long() */
2352 
2353 static INLINE int
2354 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2355 {
2356 #ifndef BFD64
2357   return 1;
2358 #else
2359   return num <= 0xffffffff;
2360 #endif
2361 }				/* fits_in_unsigned_long() */
2362 
2363 static INLINE int
2364 fits_in_disp8 (offsetT num)
2365 {
2366   int shift = i.memshift;
2367   unsigned int mask;
2368 
2369   if (shift == -1)
2370     abort ();
2371 
2372   mask = (1 << shift) - 1;
2373 
2374   /* Return 0 if NUM isn't properly aligned.  */
2375   if ((num & mask))
2376     return 0;
2377 
2378   /* Check if NUM will fit in 8bit after shift.  */
2379   return fits_in_signed_byte (num >> shift);
2380 }
2381 
2382 static INLINE int
2383 fits_in_imm4 (offsetT num)
2384 {
2385   return (num & 0xf) == num;
2386 }
2387 
2388 static i386_operand_type
2389 smallest_imm_type (offsetT num)
2390 {
2391   i386_operand_type t;
2392 
2393   operand_type_set (&t, 0);
2394   t.bitfield.imm64 = 1;
2395 
2396   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2397     {
2398       /* This code is disabled on the 486 because all the Imm1 forms
2399 	 in the opcode table are slower on the i486.  They're the
2400 	 versions with the implicitly specified single-position
2401 	 displacement, which has another syntax if you really want to
2402 	 use that form.  */
2403       t.bitfield.imm1 = 1;
2404       t.bitfield.imm8 = 1;
2405       t.bitfield.imm8s = 1;
2406       t.bitfield.imm16 = 1;
2407       t.bitfield.imm32 = 1;
2408       t.bitfield.imm32s = 1;
2409     }
2410   else if (fits_in_signed_byte (num))
2411     {
2412       t.bitfield.imm8 = 1;
2413       t.bitfield.imm8s = 1;
2414       t.bitfield.imm16 = 1;
2415       t.bitfield.imm32 = 1;
2416       t.bitfield.imm32s = 1;
2417     }
2418   else if (fits_in_unsigned_byte (num))
2419     {
2420       t.bitfield.imm8 = 1;
2421       t.bitfield.imm16 = 1;
2422       t.bitfield.imm32 = 1;
2423       t.bitfield.imm32s = 1;
2424     }
2425   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2426     {
2427       t.bitfield.imm16 = 1;
2428       t.bitfield.imm32 = 1;
2429       t.bitfield.imm32s = 1;
2430     }
2431   else if (fits_in_signed_long (num))
2432     {
2433       t.bitfield.imm32 = 1;
2434       t.bitfield.imm32s = 1;
2435     }
2436   else if (fits_in_unsigned_long (num))
2437     t.bitfield.imm32 = 1;
2438 
2439   return t;
2440 }
2441 
2442 static offsetT
2443 offset_in_range (offsetT val, int size)
2444 {
2445   addressT mask;
2446 
2447   switch (size)
2448     {
2449     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2450     case 2: mask = ((addressT) 1 << 16) - 1; break;
2451     case 4: mask = ((addressT) 2 << 31) - 1; break;
2452 #ifdef BFD64
2453     case 8: mask = ((addressT) 2 << 63) - 1; break;
2454 #endif
2455     default: abort ();
2456     }
2457 
2458 #ifdef BFD64
2459   /* If BFD64, sign extend val for 32bit address mode.  */
2460   if (flag_code != CODE_64BIT
2461       || i.prefix[ADDR_PREFIX])
2462     if ((val & ~(((addressT) 2 << 31) - 1)) == 0)
2463       val = (val ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2464 #endif
2465 
2466   if ((val & ~mask) != 0 && (val & ~mask) != ~mask)
2467     {
2468       char buf1[40], buf2[40];
2469 
2470       sprint_value (buf1, val);
2471       sprint_value (buf2, val & mask);
2472       as_warn (_("%s shortened to %s"), buf1, buf2);
2473     }
2474   return val & mask;
2475 }
2476 
2477 enum PREFIX_GROUP
2478 {
2479   PREFIX_EXIST = 0,
2480   PREFIX_LOCK,
2481   PREFIX_REP,
2482   PREFIX_DS,
2483   PREFIX_OTHER
2484 };
2485 
2486 /* Returns
2487    a. PREFIX_EXIST if attempting to add a prefix where one from the
2488    same class already exists.
2489    b. PREFIX_LOCK if lock prefix is added.
2490    c. PREFIX_REP if rep/repne prefix is added.
2491    d. PREFIX_DS if ds prefix is added.
2492    e. PREFIX_OTHER if other prefix is added.
2493  */
2494 
2495 static enum PREFIX_GROUP
2496 add_prefix (unsigned int prefix)
2497 {
2498   enum PREFIX_GROUP ret = PREFIX_OTHER;
2499   unsigned int q;
2500 
2501   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2502       && flag_code == CODE_64BIT)
2503     {
2504       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2505 	  || (i.prefix[REX_PREFIX] & prefix & REX_R)
2506 	  || (i.prefix[REX_PREFIX] & prefix & REX_X)
2507 	  || (i.prefix[REX_PREFIX] & prefix & REX_B))
2508 	ret = PREFIX_EXIST;
2509       q = REX_PREFIX;
2510     }
2511   else
2512     {
2513       switch (prefix)
2514 	{
2515 	default:
2516 	  abort ();
2517 
2518 	case DS_PREFIX_OPCODE:
2519 	  ret = PREFIX_DS;
2520 	  /* Fall through.  */
2521 	case CS_PREFIX_OPCODE:
2522 	case ES_PREFIX_OPCODE:
2523 	case FS_PREFIX_OPCODE:
2524 	case GS_PREFIX_OPCODE:
2525 	case SS_PREFIX_OPCODE:
2526 	  q = SEG_PREFIX;
2527 	  break;
2528 
2529 	case REPNE_PREFIX_OPCODE:
2530 	case REPE_PREFIX_OPCODE:
2531 	  q = REP_PREFIX;
2532 	  ret = PREFIX_REP;
2533 	  break;
2534 
2535 	case LOCK_PREFIX_OPCODE:
2536 	  q = LOCK_PREFIX;
2537 	  ret = PREFIX_LOCK;
2538 	  break;
2539 
2540 	case FWAIT_OPCODE:
2541 	  q = WAIT_PREFIX;
2542 	  break;
2543 
2544 	case ADDR_PREFIX_OPCODE:
2545 	  q = ADDR_PREFIX;
2546 	  break;
2547 
2548 	case DATA_PREFIX_OPCODE:
2549 	  q = DATA_PREFIX;
2550 	  break;
2551 	}
2552       if (i.prefix[q] != 0)
2553 	ret = PREFIX_EXIST;
2554     }
2555 
2556   if (ret)
2557     {
2558       if (!i.prefix[q])
2559 	++i.prefixes;
2560       i.prefix[q] |= prefix;
2561     }
2562   else
2563     as_bad (_("same type of prefix used twice"));
2564 
2565   return ret;
2566 }
2567 
2568 static void
2569 update_code_flag (int value, int check)
2570 {
2571   PRINTF_LIKE ((*as_error));
2572 
2573   flag_code = (enum flag_code) value;
2574   if (flag_code == CODE_64BIT)
2575     {
2576       cpu_arch_flags.bitfield.cpu64 = 1;
2577       cpu_arch_flags.bitfield.cpuno64 = 0;
2578     }
2579   else
2580     {
2581       cpu_arch_flags.bitfield.cpu64 = 0;
2582       cpu_arch_flags.bitfield.cpuno64 = 1;
2583     }
2584   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2585     {
2586       if (check)
2587 	as_error = as_fatal;
2588       else
2589 	as_error = as_bad;
2590       (*as_error) (_("64bit mode not supported on `%s'."),
2591 		   cpu_arch_name ? cpu_arch_name : default_arch);
2592     }
2593   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2594     {
2595       if (check)
2596 	as_error = as_fatal;
2597       else
2598 	as_error = as_bad;
2599       (*as_error) (_("32bit mode not supported on `%s'."),
2600 		   cpu_arch_name ? cpu_arch_name : default_arch);
2601     }
2602   stackop_size = '\0';
2603 }
2604 
2605 static void
2606 set_code_flag (int value)
2607 {
2608   update_code_flag (value, 0);
2609 }
2610 
2611 static void
2612 set_16bit_gcc_code_flag (int new_code_flag)
2613 {
2614   flag_code = (enum flag_code) new_code_flag;
2615   if (flag_code != CODE_16BIT)
2616     abort ();
2617   cpu_arch_flags.bitfield.cpu64 = 0;
2618   cpu_arch_flags.bitfield.cpuno64 = 1;
2619   stackop_size = LONG_MNEM_SUFFIX;
2620 }
2621 
2622 static void
2623 set_intel_syntax (int syntax_flag)
2624 {
2625   /* Find out if register prefixing is specified.  */
2626   int ask_naked_reg = 0;
2627 
2628   SKIP_WHITESPACE ();
2629   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2630     {
2631       char *string;
2632       int e = get_symbol_name (&string);
2633 
2634       if (strcmp (string, "prefix") == 0)
2635 	ask_naked_reg = 1;
2636       else if (strcmp (string, "noprefix") == 0)
2637 	ask_naked_reg = -1;
2638       else
2639 	as_bad (_("bad argument to syntax directive."));
2640       (void) restore_line_pointer (e);
2641     }
2642   demand_empty_rest_of_line ();
2643 
2644   intel_syntax = syntax_flag;
2645 
2646   if (ask_naked_reg == 0)
2647     allow_naked_reg = (intel_syntax
2648 		       && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2649   else
2650     allow_naked_reg = (ask_naked_reg < 0);
2651 
2652   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2653 
2654   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2655   identifier_chars['$'] = intel_syntax ? '$' : 0;
2656   register_prefix = allow_naked_reg ? "" : "%";
2657 }
2658 
2659 static void
2660 set_intel_mnemonic (int mnemonic_flag)
2661 {
2662   intel_mnemonic = mnemonic_flag;
2663 }
2664 
2665 static void
2666 set_allow_index_reg (int flag)
2667 {
2668   allow_index_reg = flag;
2669 }
2670 
2671 static void
2672 set_check (int what)
2673 {
2674   enum check_kind *kind;
2675   const char *str;
2676 
2677   if (what)
2678     {
2679       kind = &operand_check;
2680       str = "operand";
2681     }
2682   else
2683     {
2684       kind = &sse_check;
2685       str = "sse";
2686     }
2687 
2688   SKIP_WHITESPACE ();
2689 
2690   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2691     {
2692       char *string;
2693       int e = get_symbol_name (&string);
2694 
2695       if (strcmp (string, "none") == 0)
2696 	*kind = check_none;
2697       else if (strcmp (string, "warning") == 0)
2698 	*kind = check_warning;
2699       else if (strcmp (string, "error") == 0)
2700 	*kind = check_error;
2701       else
2702 	as_bad (_("bad argument to %s_check directive."), str);
2703       (void) restore_line_pointer (e);
2704     }
2705   else
2706     as_bad (_("missing argument for %s_check directive"), str);
2707 
2708   demand_empty_rest_of_line ();
2709 }
2710 
2711 static void
2712 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2713 			   i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2714 {
2715 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2716   static const char *arch;
2717 
2718   /* Intel LIOM is only supported on ELF.  */
2719   if (!IS_ELF)
2720     return;
2721 
2722   if (!arch)
2723     {
2724       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2725 	 use default_arch.  */
2726       arch = cpu_arch_name;
2727       if (!arch)
2728 	arch = default_arch;
2729     }
2730 
2731   /* If we are targeting Intel MCU, we must enable it.  */
2732   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_IAMCU
2733       || new_flag.bitfield.cpuiamcu)
2734     return;
2735 
2736   /* If we are targeting Intel L1OM, we must enable it.  */
2737   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_L1OM
2738       || new_flag.bitfield.cpul1om)
2739     return;
2740 
2741   /* If we are targeting Intel K1OM, we must enable it.  */
2742   if (get_elf_backend_data (stdoutput)->elf_machine_code != EM_K1OM
2743       || new_flag.bitfield.cpuk1om)
2744     return;
2745 
2746   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2747 #endif
2748 }
2749 
2750 static void
2751 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2752 {
2753   SKIP_WHITESPACE ();
2754 
2755   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2756     {
2757       char *string;
2758       int e = get_symbol_name (&string);
2759       unsigned int j;
2760       i386_cpu_flags flags;
2761 
2762       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2763 	{
2764 	  if (strcmp (string, cpu_arch[j].name) == 0)
2765 	    {
2766 	      check_cpu_arch_compatible (string, cpu_arch[j].flags);
2767 
2768 	      if (*string != '.')
2769 		{
2770 		  cpu_arch_name = cpu_arch[j].name;
2771 		  cpu_sub_arch_name = NULL;
2772 		  cpu_arch_flags = cpu_arch[j].flags;
2773 		  if (flag_code == CODE_64BIT)
2774 		    {
2775 		      cpu_arch_flags.bitfield.cpu64 = 1;
2776 		      cpu_arch_flags.bitfield.cpuno64 = 0;
2777 		    }
2778 		  else
2779 		    {
2780 		      cpu_arch_flags.bitfield.cpu64 = 0;
2781 		      cpu_arch_flags.bitfield.cpuno64 = 1;
2782 		    }
2783 		  cpu_arch_isa = cpu_arch[j].type;
2784 		  cpu_arch_isa_flags = cpu_arch[j].flags;
2785 		  if (!cpu_arch_tune_set)
2786 		    {
2787 		      cpu_arch_tune = cpu_arch_isa;
2788 		      cpu_arch_tune_flags = cpu_arch_isa_flags;
2789 		    }
2790 		  break;
2791 		}
2792 
2793 	      flags = cpu_flags_or (cpu_arch_flags,
2794 				    cpu_arch[j].flags);
2795 
2796 	      if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2797 		{
2798 		  if (cpu_sub_arch_name)
2799 		    {
2800 		      char *name = cpu_sub_arch_name;
2801 		      cpu_sub_arch_name = concat (name,
2802 						  cpu_arch[j].name,
2803 						  (const char *) NULL);
2804 		      free (name);
2805 		    }
2806 		  else
2807 		    cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
2808 		  cpu_arch_flags = flags;
2809 		  cpu_arch_isa_flags = flags;
2810 		}
2811 	      else
2812 		cpu_arch_isa_flags
2813 		  = cpu_flags_or (cpu_arch_isa_flags,
2814 				  cpu_arch[j].flags);
2815 	      (void) restore_line_pointer (e);
2816 	      demand_empty_rest_of_line ();
2817 	      return;
2818 	    }
2819 	}
2820 
2821       if (*string == '.' && j >= ARRAY_SIZE (cpu_arch))
2822 	{
2823 	  /* Disable an ISA extension.  */
2824 	  for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
2825 	    if (strcmp (string + 1, cpu_noarch [j].name) == 0)
2826 	      {
2827 		flags = cpu_flags_and_not (cpu_arch_flags,
2828 					   cpu_noarch[j].flags);
2829 		if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2830 		  {
2831 		    if (cpu_sub_arch_name)
2832 		      {
2833 			char *name = cpu_sub_arch_name;
2834 			cpu_sub_arch_name = concat (name, string,
2835 						    (const char *) NULL);
2836 			free (name);
2837 		      }
2838 		    else
2839 		      cpu_sub_arch_name = xstrdup (string);
2840 		    cpu_arch_flags = flags;
2841 		    cpu_arch_isa_flags = flags;
2842 		  }
2843 		(void) restore_line_pointer (e);
2844 		demand_empty_rest_of_line ();
2845 		return;
2846 	      }
2847 
2848 	  j = ARRAY_SIZE (cpu_arch);
2849 	}
2850 
2851       if (j >= ARRAY_SIZE (cpu_arch))
2852 	as_bad (_("no such architecture: `%s'"), string);
2853 
2854       *input_line_pointer = e;
2855     }
2856   else
2857     as_bad (_("missing cpu architecture"));
2858 
2859   no_cond_jump_promotion = 0;
2860   if (*input_line_pointer == ','
2861       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2862     {
2863       char *string;
2864       char e;
2865 
2866       ++input_line_pointer;
2867       e = get_symbol_name (&string);
2868 
2869       if (strcmp (string, "nojumps") == 0)
2870 	no_cond_jump_promotion = 1;
2871       else if (strcmp (string, "jumps") == 0)
2872 	;
2873       else
2874 	as_bad (_("no such architecture modifier: `%s'"), string);
2875 
2876       (void) restore_line_pointer (e);
2877     }
2878 
2879   demand_empty_rest_of_line ();
2880 }
2881 
2882 enum bfd_architecture
2883 i386_arch (void)
2884 {
2885   if (cpu_arch_isa == PROCESSOR_L1OM)
2886     {
2887       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2888 	  || flag_code != CODE_64BIT)
2889 	as_fatal (_("Intel L1OM is 64bit ELF only"));
2890       return bfd_arch_l1om;
2891     }
2892   else if (cpu_arch_isa == PROCESSOR_K1OM)
2893     {
2894       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2895 	  || flag_code != CODE_64BIT)
2896 	as_fatal (_("Intel K1OM is 64bit ELF only"));
2897       return bfd_arch_k1om;
2898     }
2899   else if (cpu_arch_isa == PROCESSOR_IAMCU)
2900     {
2901       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2902 	  || flag_code == CODE_64BIT)
2903 	as_fatal (_("Intel MCU is 32bit ELF only"));
2904       return bfd_arch_iamcu;
2905     }
2906   else
2907     return bfd_arch_i386;
2908 }
2909 
2910 unsigned long
2911 i386_mach (void)
2912 {
2913   if (!strncmp (default_arch, "x86_64", 6))
2914     {
2915       if (cpu_arch_isa == PROCESSOR_L1OM)
2916 	{
2917 	  if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2918 	      || default_arch[6] != '\0')
2919 	    as_fatal (_("Intel L1OM is 64bit ELF only"));
2920 	  return bfd_mach_l1om;
2921 	}
2922       else if (cpu_arch_isa == PROCESSOR_K1OM)
2923 	{
2924 	  if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2925 	      || default_arch[6] != '\0')
2926 	    as_fatal (_("Intel K1OM is 64bit ELF only"));
2927 	  return bfd_mach_k1om;
2928 	}
2929       else if (default_arch[6] == '\0')
2930 	return bfd_mach_x86_64;
2931       else
2932 	return bfd_mach_x64_32;
2933     }
2934   else if (!strcmp (default_arch, "i386")
2935 	   || !strcmp (default_arch, "iamcu"))
2936     {
2937       if (cpu_arch_isa == PROCESSOR_IAMCU)
2938 	{
2939 	  if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2940 	    as_fatal (_("Intel MCU is 32bit ELF only"));
2941 	  return bfd_mach_i386_iamcu;
2942 	}
2943       else
2944 	return bfd_mach_i386_i386;
2945     }
2946   else
2947     as_fatal (_("unknown architecture"));
2948 }
2949 
2950 void
2951 md_begin (void)
2952 {
2953   const char *hash_err;
2954 
2955   /* Support pseudo prefixes like {disp32}.  */
2956   lex_type ['{'] = LEX_BEGIN_NAME;
2957 
2958   /* Initialize op_hash hash table.  */
2959   op_hash = hash_new ();
2960 
2961   {
2962     const insn_template *optab;
2963     templates *core_optab;
2964 
2965     /* Setup for loop.  */
2966     optab = i386_optab;
2967     core_optab = XNEW (templates);
2968     core_optab->start = optab;
2969 
2970     while (1)
2971       {
2972 	++optab;
2973 	if (optab->name == NULL
2974 	    || strcmp (optab->name, (optab - 1)->name) != 0)
2975 	  {
2976 	    /* different name --> ship out current template list;
2977 	       add to hash table; & begin anew.  */
2978 	    core_optab->end = optab;
2979 	    hash_err = hash_insert (op_hash,
2980 				    (optab - 1)->name,
2981 				    (void *) core_optab);
2982 	    if (hash_err)
2983 	      {
2984 		as_fatal (_("can't hash %s: %s"),
2985 			  (optab - 1)->name,
2986 			  hash_err);
2987 	      }
2988 	    if (optab->name == NULL)
2989 	      break;
2990 	    core_optab = XNEW (templates);
2991 	    core_optab->start = optab;
2992 	  }
2993       }
2994   }
2995 
2996   /* Initialize reg_hash hash table.  */
2997   reg_hash = hash_new ();
2998   {
2999     const reg_entry *regtab;
3000     unsigned int regtab_size = i386_regtab_size;
3001 
3002     for (regtab = i386_regtab; regtab_size--; regtab++)
3003       {
3004 	hash_err = hash_insert (reg_hash, regtab->reg_name, (void *) regtab);
3005 	if (hash_err)
3006 	  as_fatal (_("can't hash %s: %s"),
3007 		    regtab->reg_name,
3008 		    hash_err);
3009       }
3010   }
3011 
3012   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3013   {
3014     int c;
3015     char *p;
3016 
3017     for (c = 0; c < 256; c++)
3018       {
3019 	if (ISDIGIT (c))
3020 	  {
3021 	    digit_chars[c] = c;
3022 	    mnemonic_chars[c] = c;
3023 	    register_chars[c] = c;
3024 	    operand_chars[c] = c;
3025 	  }
3026 	else if (ISLOWER (c))
3027 	  {
3028 	    mnemonic_chars[c] = c;
3029 	    register_chars[c] = c;
3030 	    operand_chars[c] = c;
3031 	  }
3032 	else if (ISUPPER (c))
3033 	  {
3034 	    mnemonic_chars[c] = TOLOWER (c);
3035 	    register_chars[c] = mnemonic_chars[c];
3036 	    operand_chars[c] = c;
3037 	  }
3038 	else if (c == '{' || c == '}')
3039 	  {
3040 	    mnemonic_chars[c] = c;
3041 	    operand_chars[c] = c;
3042 	  }
3043 
3044 	if (ISALPHA (c) || ISDIGIT (c))
3045 	  identifier_chars[c] = c;
3046 	else if (c >= 128)
3047 	  {
3048 	    identifier_chars[c] = c;
3049 	    operand_chars[c] = c;
3050 	  }
3051       }
3052 
3053 #ifdef LEX_AT
3054     identifier_chars['@'] = '@';
3055 #endif
3056 #ifdef LEX_QM
3057     identifier_chars['?'] = '?';
3058     operand_chars['?'] = '?';
3059 #endif
3060     digit_chars['-'] = '-';
3061     mnemonic_chars['_'] = '_';
3062     mnemonic_chars['-'] = '-';
3063     mnemonic_chars['.'] = '.';
3064     identifier_chars['_'] = '_';
3065     identifier_chars['.'] = '.';
3066 
3067     for (p = operand_special_chars; *p != '\0'; p++)
3068       operand_chars[(unsigned char) *p] = *p;
3069   }
3070 
3071   if (flag_code == CODE_64BIT)
3072     {
3073 #if defined (OBJ_COFF) && defined (TE_PE)
3074       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3075 				  ? 32 : 16);
3076 #else
3077       x86_dwarf2_return_column = 16;
3078 #endif
3079       x86_cie_data_alignment = -8;
3080     }
3081   else
3082     {
3083       x86_dwarf2_return_column = 8;
3084       x86_cie_data_alignment = -4;
3085     }
3086 
3087   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3088      can be turned into BRANCH_PREFIX frag.  */
3089   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3090     abort ();
3091 }
3092 
3093 void
3094 i386_print_statistics (FILE *file)
3095 {
3096   hash_print_statistics (file, "i386 opcode", op_hash);
3097   hash_print_statistics (file, "i386 register", reg_hash);
3098 }
3099 
3100 #ifdef DEBUG386
3101 
3102 /* Debugging routines for md_assemble.  */
3103 static void pte (insn_template *);
3104 static void pt (i386_operand_type);
3105 static void pe (expressionS *);
3106 static void ps (symbolS *);
3107 
3108 static void
3109 pi (const char *line, i386_insn *x)
3110 {
3111   unsigned int j;
3112 
3113   fprintf (stdout, "%s: template ", line);
3114   pte (&x->tm);
3115   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3116 	   x->base_reg ? x->base_reg->reg_name : "none",
3117 	   x->index_reg ? x->index_reg->reg_name : "none",
3118 	   x->log2_scale_factor);
3119   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3120 	   x->rm.mode, x->rm.reg, x->rm.regmem);
3121   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3122 	   x->sib.base, x->sib.index, x->sib.scale);
3123   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3124 	   (x->rex & REX_W) != 0,
3125 	   (x->rex & REX_R) != 0,
3126 	   (x->rex & REX_X) != 0,
3127 	   (x->rex & REX_B) != 0);
3128   for (j = 0; j < x->operands; j++)
3129     {
3130       fprintf (stdout, "    #%d:  ", j + 1);
3131       pt (x->types[j]);
3132       fprintf (stdout, "\n");
3133       if (x->types[j].bitfield.class == Reg
3134 	  || x->types[j].bitfield.class == RegMMX
3135 	  || x->types[j].bitfield.class == RegSIMD
3136 	  || x->types[j].bitfield.class == SReg
3137 	  || x->types[j].bitfield.class == RegCR
3138 	  || x->types[j].bitfield.class == RegDR
3139 	  || x->types[j].bitfield.class == RegTR)
3140 	fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3141       if (operand_type_check (x->types[j], imm))
3142 	pe (x->op[j].imms);
3143       if (operand_type_check (x->types[j], disp))
3144 	pe (x->op[j].disps);
3145     }
3146 }
3147 
3148 static void
3149 pte (insn_template *t)
3150 {
3151   unsigned int j;
3152   fprintf (stdout, " %d operands ", t->operands);
3153   fprintf (stdout, "opcode %x ", t->base_opcode);
3154   if (t->extension_opcode != None)
3155     fprintf (stdout, "ext %x ", t->extension_opcode);
3156   if (t->opcode_modifier.d)
3157     fprintf (stdout, "D");
3158   if (t->opcode_modifier.w)
3159     fprintf (stdout, "W");
3160   fprintf (stdout, "\n");
3161   for (j = 0; j < t->operands; j++)
3162     {
3163       fprintf (stdout, "    #%d type ", j + 1);
3164       pt (t->operand_types[j]);
3165       fprintf (stdout, "\n");
3166     }
3167 }
3168 
3169 static void
3170 pe (expressionS *e)
3171 {
3172   fprintf (stdout, "    operation     %d\n", e->X_op);
3173   fprintf (stdout, "    add_number    %ld (%lx)\n",
3174 	   (long) e->X_add_number, (long) e->X_add_number);
3175   if (e->X_add_symbol)
3176     {
3177       fprintf (stdout, "    add_symbol    ");
3178       ps (e->X_add_symbol);
3179       fprintf (stdout, "\n");
3180     }
3181   if (e->X_op_symbol)
3182     {
3183       fprintf (stdout, "    op_symbol    ");
3184       ps (e->X_op_symbol);
3185       fprintf (stdout, "\n");
3186     }
3187 }
3188 
3189 static void
3190 ps (symbolS *s)
3191 {
3192   fprintf (stdout, "%s type %s%s",
3193 	   S_GET_NAME (s),
3194 	   S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3195 	   segment_name (S_GET_SEGMENT (s)));
3196 }
3197 
3198 static struct type_name
3199   {
3200     i386_operand_type mask;
3201     const char *name;
3202   }
3203 const type_names[] =
3204 {
3205   { OPERAND_TYPE_REG8, "r8" },
3206   { OPERAND_TYPE_REG16, "r16" },
3207   { OPERAND_TYPE_REG32, "r32" },
3208   { OPERAND_TYPE_REG64, "r64" },
3209   { OPERAND_TYPE_ACC8, "acc8" },
3210   { OPERAND_TYPE_ACC16, "acc16" },
3211   { OPERAND_TYPE_ACC32, "acc32" },
3212   { OPERAND_TYPE_ACC64, "acc64" },
3213   { OPERAND_TYPE_IMM8, "i8" },
3214   { OPERAND_TYPE_IMM8, "i8s" },
3215   { OPERAND_TYPE_IMM16, "i16" },
3216   { OPERAND_TYPE_IMM32, "i32" },
3217   { OPERAND_TYPE_IMM32S, "i32s" },
3218   { OPERAND_TYPE_IMM64, "i64" },
3219   { OPERAND_TYPE_IMM1, "i1" },
3220   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3221   { OPERAND_TYPE_DISP8, "d8" },
3222   { OPERAND_TYPE_DISP16, "d16" },
3223   { OPERAND_TYPE_DISP32, "d32" },
3224   { OPERAND_TYPE_DISP32S, "d32s" },
3225   { OPERAND_TYPE_DISP64, "d64" },
3226   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3227   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3228   { OPERAND_TYPE_CONTROL, "control reg" },
3229   { OPERAND_TYPE_TEST, "test reg" },
3230   { OPERAND_TYPE_DEBUG, "debug reg" },
3231   { OPERAND_TYPE_FLOATREG, "FReg" },
3232   { OPERAND_TYPE_FLOATACC, "FAcc" },
3233   { OPERAND_TYPE_SREG, "SReg" },
3234   { OPERAND_TYPE_REGMMX, "rMMX" },
3235   { OPERAND_TYPE_REGXMM, "rXMM" },
3236   { OPERAND_TYPE_REGYMM, "rYMM" },
3237   { OPERAND_TYPE_REGZMM, "rZMM" },
3238   { OPERAND_TYPE_REGMASK, "Mask reg" },
3239 };
3240 
3241 static void
3242 pt (i386_operand_type t)
3243 {
3244   unsigned int j;
3245   i386_operand_type a;
3246 
3247   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3248     {
3249       a = operand_type_and (t, type_names[j].mask);
3250       if (operand_type_equal (&a, &type_names[j].mask))
3251 	fprintf (stdout, "%s, ",  type_names[j].name);
3252     }
3253   fflush (stdout);
3254 }
3255 
3256 #endif /* DEBUG386 */
3257 
3258 static bfd_reloc_code_real_type
3259 reloc (unsigned int size,
3260        int pcrel,
3261        int sign,
3262        bfd_reloc_code_real_type other)
3263 {
3264   if (other != NO_RELOC)
3265     {
3266       reloc_howto_type *rel;
3267 
3268       if (size == 8)
3269 	switch (other)
3270 	  {
3271 	  case BFD_RELOC_X86_64_GOT32:
3272 	    return BFD_RELOC_X86_64_GOT64;
3273 	    break;
3274 	  case BFD_RELOC_X86_64_GOTPLT64:
3275 	    return BFD_RELOC_X86_64_GOTPLT64;
3276 	    break;
3277 	  case BFD_RELOC_X86_64_PLTOFF64:
3278 	    return BFD_RELOC_X86_64_PLTOFF64;
3279 	    break;
3280 	  case BFD_RELOC_X86_64_GOTPC32:
3281 	    other = BFD_RELOC_X86_64_GOTPC64;
3282 	    break;
3283 	  case BFD_RELOC_X86_64_GOTPCREL:
3284 	    other = BFD_RELOC_X86_64_GOTPCREL64;
3285 	    break;
3286 	  case BFD_RELOC_X86_64_TPOFF32:
3287 	    other = BFD_RELOC_X86_64_TPOFF64;
3288 	    break;
3289 	  case BFD_RELOC_X86_64_DTPOFF32:
3290 	    other = BFD_RELOC_X86_64_DTPOFF64;
3291 	    break;
3292 	  default:
3293 	    break;
3294 	  }
3295 
3296 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3297       if (other == BFD_RELOC_SIZE32)
3298 	{
3299 	  if (size == 8)
3300 	    other = BFD_RELOC_SIZE64;
3301 	  if (pcrel)
3302 	    {
3303 	      as_bad (_("there are no pc-relative size relocations"));
3304 	      return NO_RELOC;
3305 	    }
3306 	}
3307 #endif
3308 
3309       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3310       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3311 	sign = -1;
3312 
3313       rel = bfd_reloc_type_lookup (stdoutput, other);
3314       if (!rel)
3315 	as_bad (_("unknown relocation (%u)"), other);
3316       else if (size != bfd_get_reloc_size (rel))
3317 	as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3318 		bfd_get_reloc_size (rel),
3319 		size);
3320       else if (pcrel && !rel->pc_relative)
3321 	as_bad (_("non-pc-relative relocation for pc-relative field"));
3322       else if ((rel->complain_on_overflow == complain_overflow_signed
3323 		&& !sign)
3324 	       || (rel->complain_on_overflow == complain_overflow_unsigned
3325 		   && sign > 0))
3326 	as_bad (_("relocated field and relocation type differ in signedness"));
3327       else
3328 	return other;
3329       return NO_RELOC;
3330     }
3331 
3332   if (pcrel)
3333     {
3334       if (!sign)
3335 	as_bad (_("there are no unsigned pc-relative relocations"));
3336       switch (size)
3337 	{
3338 	case 1: return BFD_RELOC_8_PCREL;
3339 	case 2: return BFD_RELOC_16_PCREL;
3340 	case 4: return BFD_RELOC_32_PCREL;
3341 	case 8: return BFD_RELOC_64_PCREL;
3342 	}
3343       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3344     }
3345   else
3346     {
3347       if (sign > 0)
3348 	switch (size)
3349 	  {
3350 	  case 4: return BFD_RELOC_X86_64_32S;
3351 	  }
3352       else
3353 	switch (size)
3354 	  {
3355 	  case 1: return BFD_RELOC_8;
3356 	  case 2: return BFD_RELOC_16;
3357 	  case 4: return BFD_RELOC_32;
3358 	  case 8: return BFD_RELOC_64;
3359 	  }
3360       as_bad (_("cannot do %s %u byte relocation"),
3361 	      sign > 0 ? "signed" : "unsigned", size);
3362     }
3363 
3364   return NO_RELOC;
3365 }
3366 
3367 /* Here we decide which fixups can be adjusted to make them relative to
3368    the beginning of the section instead of the symbol.  Basically we need
3369    to make sure that the dynamic relocations are done correctly, so in
3370    some cases we force the original symbol to be used.  */
3371 
3372 int
3373 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3374 {
3375 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3376   if (!IS_ELF)
3377     return 1;
3378 
3379   /* Don't adjust pc-relative references to merge sections in 64-bit
3380      mode.  */
3381   if (use_rela_relocations
3382       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3383       && fixP->fx_pcrel)
3384     return 0;
3385 
3386   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3387      and changed later by validate_fix.  */
3388   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3389       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3390     return 0;
3391 
3392   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3393      for size relocations.  */
3394   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3395       || fixP->fx_r_type == BFD_RELOC_SIZE64
3396       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3397       || fixP->fx_r_type == BFD_RELOC_386_PLT32
3398       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3399       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3400       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3401       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3402       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3403       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3404       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3405       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3406       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3407       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3408       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3409       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3410       || fixP->fx_r_type == BFD_RELOC_X86_64_PLT32
3411       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3412       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3413       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3414       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3415       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3416       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3417       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3418       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3419       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3420       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3421       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3422       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3423       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3424       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3425       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3426       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3427     return 0;
3428 #endif
3429   return 1;
3430 }
3431 
3432 static int
3433 intel_float_operand (const char *mnemonic)
3434 {
3435   /* Note that the value returned is meaningful only for opcodes with (memory)
3436      operands, hence the code here is free to improperly handle opcodes that
3437      have no operands (for better performance and smaller code). */
3438 
3439   if (mnemonic[0] != 'f')
3440     return 0; /* non-math */
3441 
3442   switch (mnemonic[1])
3443     {
3444     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3445        the fs segment override prefix not currently handled because no
3446        call path can make opcodes without operands get here */
3447     case 'i':
3448       return 2 /* integer op */;
3449     case 'l':
3450       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3451 	return 3; /* fldcw/fldenv */
3452       break;
3453     case 'n':
3454       if (mnemonic[2] != 'o' /* fnop */)
3455 	return 3; /* non-waiting control op */
3456       break;
3457     case 'r':
3458       if (mnemonic[2] == 's')
3459 	return 3; /* frstor/frstpm */
3460       break;
3461     case 's':
3462       if (mnemonic[2] == 'a')
3463 	return 3; /* fsave */
3464       if (mnemonic[2] == 't')
3465 	{
3466 	  switch (mnemonic[3])
3467 	    {
3468 	    case 'c': /* fstcw */
3469 	    case 'd': /* fstdw */
3470 	    case 'e': /* fstenv */
3471 	    case 's': /* fsts[gw] */
3472 	      return 3;
3473 	    }
3474 	}
3475       break;
3476     case 'x':
3477       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3478 	return 0; /* fxsave/fxrstor are not really math ops */
3479       break;
3480     }
3481 
3482   return 1;
3483 }
3484 
3485 /* Build the VEX prefix.  */
3486 
3487 static void
3488 build_vex_prefix (const insn_template *t)
3489 {
3490   unsigned int register_specifier;
3491   unsigned int implied_prefix;
3492   unsigned int vector_length;
3493   unsigned int w;
3494 
3495   /* Check register specifier.  */
3496   if (i.vex.register_specifier)
3497     {
3498       register_specifier =
3499 	~register_number (i.vex.register_specifier) & 0xf;
3500       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3501     }
3502   else
3503     register_specifier = 0xf;
3504 
3505   /* Use 2-byte VEX prefix by swapping destination and source operand
3506      if there are more than 1 register operand.  */
3507   if (i.reg_operands > 1
3508       && i.vec_encoding != vex_encoding_vex3
3509       && i.dir_encoding == dir_encoding_default
3510       && i.operands == i.reg_operands
3511       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3512       && i.tm.opcode_modifier.vexopcode == VEX0F
3513       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3514       && i.rex == REX_B)
3515     {
3516       unsigned int xchg = i.operands - 1;
3517       union i386_op temp_op;
3518       i386_operand_type temp_type;
3519 
3520       temp_type = i.types[xchg];
3521       i.types[xchg] = i.types[0];
3522       i.types[0] = temp_type;
3523       temp_op = i.op[xchg];
3524       i.op[xchg] = i.op[0];
3525       i.op[0] = temp_op;
3526 
3527       gas_assert (i.rm.mode == 3);
3528 
3529       i.rex = REX_R;
3530       xchg = i.rm.regmem;
3531       i.rm.regmem = i.rm.reg;
3532       i.rm.reg = xchg;
3533 
3534       if (i.tm.opcode_modifier.d)
3535 	i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3536 			    ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
3537       else /* Use the next insn.  */
3538 	i.tm = t[1];
3539     }
3540 
3541   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3542      are no memory operands and at least 3 register ones.  */
3543   if (i.reg_operands >= 3
3544       && i.vec_encoding != vex_encoding_vex3
3545       && i.reg_operands == i.operands - i.imm_operands
3546       && i.tm.opcode_modifier.vex
3547       && i.tm.opcode_modifier.commutative
3548       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3549       && i.rex == REX_B
3550       && i.vex.register_specifier
3551       && !(i.vex.register_specifier->reg_flags & RegRex))
3552     {
3553       unsigned int xchg = i.operands - i.reg_operands;
3554       union i386_op temp_op;
3555       i386_operand_type temp_type;
3556 
3557       gas_assert (i.tm.opcode_modifier.vexopcode == VEX0F);
3558       gas_assert (!i.tm.opcode_modifier.sae);
3559       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3560                                       &i.types[i.operands - 3]));
3561       gas_assert (i.rm.mode == 3);
3562 
3563       temp_type = i.types[xchg];
3564       i.types[xchg] = i.types[xchg + 1];
3565       i.types[xchg + 1] = temp_type;
3566       temp_op = i.op[xchg];
3567       i.op[xchg] = i.op[xchg + 1];
3568       i.op[xchg + 1] = temp_op;
3569 
3570       i.rex = 0;
3571       xchg = i.rm.regmem | 8;
3572       i.rm.regmem = ~register_specifier & 0xf;
3573       gas_assert (!(i.rm.regmem & 8));
3574       i.vex.register_specifier += xchg - i.rm.regmem;
3575       register_specifier = ~xchg & 0xf;
3576     }
3577 
3578   if (i.tm.opcode_modifier.vex == VEXScalar)
3579     vector_length = avxscalar;
3580   else if (i.tm.opcode_modifier.vex == VEX256)
3581     vector_length = 1;
3582   else
3583     {
3584       unsigned int op;
3585 
3586       /* Determine vector length from the last multi-length vector
3587 	 operand.  */
3588       vector_length = 0;
3589       for (op = t->operands; op--;)
3590 	if (t->operand_types[op].bitfield.xmmword
3591 	    && t->operand_types[op].bitfield.ymmword
3592 	    && i.types[op].bitfield.ymmword)
3593 	  {
3594 	    vector_length = 1;
3595 	    break;
3596 	  }
3597     }
3598 
3599   switch ((i.tm.base_opcode >> 8) & 0xff)
3600     {
3601     case 0:
3602       implied_prefix = 0;
3603       break;
3604     case DATA_PREFIX_OPCODE:
3605       implied_prefix = 1;
3606       break;
3607     case REPE_PREFIX_OPCODE:
3608       implied_prefix = 2;
3609       break;
3610     case REPNE_PREFIX_OPCODE:
3611       implied_prefix = 3;
3612       break;
3613     default:
3614       abort ();
3615     }
3616 
3617   /* Check the REX.W bit and VEXW.  */
3618   if (i.tm.opcode_modifier.vexw == VEXWIG)
3619     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3620   else if (i.tm.opcode_modifier.vexw)
3621     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3622   else
3623     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3624 
3625   /* Use 2-byte VEX prefix if possible.  */
3626   if (w == 0
3627       && i.vec_encoding != vex_encoding_vex3
3628       && i.tm.opcode_modifier.vexopcode == VEX0F
3629       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3630     {
3631       /* 2-byte VEX prefix.  */
3632       unsigned int r;
3633 
3634       i.vex.length = 2;
3635       i.vex.bytes[0] = 0xc5;
3636 
3637       /* Check the REX.R bit.  */
3638       r = (i.rex & REX_R) ? 0 : 1;
3639       i.vex.bytes[1] = (r << 7
3640 			| register_specifier << 3
3641 			| vector_length << 2
3642 			| implied_prefix);
3643     }
3644   else
3645     {
3646       /* 3-byte VEX prefix.  */
3647       unsigned int m;
3648 
3649       i.vex.length = 3;
3650 
3651       switch (i.tm.opcode_modifier.vexopcode)
3652 	{
3653 	case VEX0F:
3654 	  m = 0x1;
3655 	  i.vex.bytes[0] = 0xc4;
3656 	  break;
3657 	case VEX0F38:
3658 	  m = 0x2;
3659 	  i.vex.bytes[0] = 0xc4;
3660 	  break;
3661 	case VEX0F3A:
3662 	  m = 0x3;
3663 	  i.vex.bytes[0] = 0xc4;
3664 	  break;
3665 	case XOP08:
3666 	  m = 0x8;
3667 	  i.vex.bytes[0] = 0x8f;
3668 	  break;
3669 	case XOP09:
3670 	  m = 0x9;
3671 	  i.vex.bytes[0] = 0x8f;
3672 	  break;
3673 	case XOP0A:
3674 	  m = 0xa;
3675 	  i.vex.bytes[0] = 0x8f;
3676 	  break;
3677 	default:
3678 	  abort ();
3679 	}
3680 
3681       /* The high 3 bits of the second VEX byte are 1's compliment
3682 	 of RXB bits from REX.  */
3683       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3684 
3685       i.vex.bytes[2] = (w << 7
3686 			| register_specifier << 3
3687 			| vector_length << 2
3688 			| implied_prefix);
3689     }
3690 }
3691 
3692 static INLINE bfd_boolean
3693 is_evex_encoding (const insn_template *t)
3694 {
3695   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3696 	 || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3697 	 || t->opcode_modifier.sae;
3698 }
3699 
3700 static INLINE bfd_boolean
3701 is_any_vex_encoding (const insn_template *t)
3702 {
3703   return t->opcode_modifier.vex || t->opcode_modifier.vexopcode
3704 	 || is_evex_encoding (t);
3705 }
3706 
3707 /* Build the EVEX prefix.  */
3708 
3709 static void
3710 build_evex_prefix (void)
3711 {
3712   unsigned int register_specifier;
3713   unsigned int implied_prefix;
3714   unsigned int m, w;
3715   rex_byte vrex_used = 0;
3716 
3717   /* Check register specifier.  */
3718   if (i.vex.register_specifier)
3719     {
3720       gas_assert ((i.vrex & REX_X) == 0);
3721 
3722       register_specifier = i.vex.register_specifier->reg_num;
3723       if ((i.vex.register_specifier->reg_flags & RegRex))
3724 	register_specifier += 8;
3725       /* The upper 16 registers are encoded in the fourth byte of the
3726 	 EVEX prefix.  */
3727       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3728 	i.vex.bytes[3] = 0x8;
3729       register_specifier = ~register_specifier & 0xf;
3730     }
3731   else
3732     {
3733       register_specifier = 0xf;
3734 
3735       /* Encode upper 16 vector index register in the fourth byte of
3736 	 the EVEX prefix.  */
3737       if (!(i.vrex & REX_X))
3738 	i.vex.bytes[3] = 0x8;
3739       else
3740 	vrex_used |= REX_X;
3741     }
3742 
3743   switch ((i.tm.base_opcode >> 8) & 0xff)
3744     {
3745     case 0:
3746       implied_prefix = 0;
3747       break;
3748     case DATA_PREFIX_OPCODE:
3749       implied_prefix = 1;
3750       break;
3751     case REPE_PREFIX_OPCODE:
3752       implied_prefix = 2;
3753       break;
3754     case REPNE_PREFIX_OPCODE:
3755       implied_prefix = 3;
3756       break;
3757     default:
3758       abort ();
3759     }
3760 
3761   /* 4 byte EVEX prefix.  */
3762   i.vex.length = 4;
3763   i.vex.bytes[0] = 0x62;
3764 
3765   /* mmmm bits.  */
3766   switch (i.tm.opcode_modifier.vexopcode)
3767     {
3768     case VEX0F:
3769       m = 1;
3770       break;
3771     case VEX0F38:
3772       m = 2;
3773       break;
3774     case VEX0F3A:
3775       m = 3;
3776       break;
3777     default:
3778       abort ();
3779       break;
3780     }
3781 
3782   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3783      bits from REX.  */
3784   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | m;
3785 
3786   /* The fifth bit of the second EVEX byte is 1's compliment of the
3787      REX_R bit in VREX.  */
3788   if (!(i.vrex & REX_R))
3789     i.vex.bytes[1] |= 0x10;
3790   else
3791     vrex_used |= REX_R;
3792 
3793   if ((i.reg_operands + i.imm_operands) == i.operands)
3794     {
3795       /* When all operands are registers, the REX_X bit in REX is not
3796 	 used.  We reuse it to encode the upper 16 registers, which is
3797 	 indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3798 	 as 1's compliment.  */
3799       if ((i.vrex & REX_B))
3800 	{
3801 	  vrex_used |= REX_B;
3802 	  i.vex.bytes[1] &= ~0x40;
3803 	}
3804     }
3805 
3806   /* EVEX instructions shouldn't need the REX prefix.  */
3807   i.vrex &= ~vrex_used;
3808   gas_assert (i.vrex == 0);
3809 
3810   /* Check the REX.W bit and VEXW.  */
3811   if (i.tm.opcode_modifier.vexw == VEXWIG)
3812     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3813   else if (i.tm.opcode_modifier.vexw)
3814     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3815   else
3816     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3817 
3818   /* Encode the U bit.  */
3819   implied_prefix |= 0x4;
3820 
3821   /* The third byte of the EVEX prefix.  */
3822   i.vex.bytes[2] = (w << 7 | register_specifier << 3 | implied_prefix);
3823 
3824   /* The fourth byte of the EVEX prefix.  */
3825   /* The zeroing-masking bit.  */
3826   if (i.mask && i.mask->zeroing)
3827     i.vex.bytes[3] |= 0x80;
3828 
3829   /* Don't always set the broadcast bit if there is no RC.  */
3830   if (!i.rounding)
3831     {
3832       /* Encode the vector length.  */
3833       unsigned int vec_length;
3834 
3835       if (!i.tm.opcode_modifier.evex
3836 	  || i.tm.opcode_modifier.evex == EVEXDYN)
3837 	{
3838 	  unsigned int op;
3839 
3840 	  /* Determine vector length from the last multi-length vector
3841 	     operand.  */
3842 	  vec_length = 0;
3843 	  for (op = i.operands; op--;)
3844 	    if (i.tm.operand_types[op].bitfield.xmmword
3845 		+ i.tm.operand_types[op].bitfield.ymmword
3846 		+ i.tm.operand_types[op].bitfield.zmmword > 1)
3847 	      {
3848 		if (i.types[op].bitfield.zmmword)
3849 		  {
3850 		    i.tm.opcode_modifier.evex = EVEX512;
3851 		    break;
3852 		  }
3853 		else if (i.types[op].bitfield.ymmword)
3854 		  {
3855 		    i.tm.opcode_modifier.evex = EVEX256;
3856 		    break;
3857 		  }
3858 		else if (i.types[op].bitfield.xmmword)
3859 		  {
3860 		    i.tm.opcode_modifier.evex = EVEX128;
3861 		    break;
3862 		  }
3863 		else if (i.broadcast && (int) op == i.broadcast->operand)
3864 		  {
3865 		    switch (i.broadcast->bytes)
3866 		      {
3867 			case 64:
3868 			  i.tm.opcode_modifier.evex = EVEX512;
3869 			  break;
3870 			case 32:
3871 			  i.tm.opcode_modifier.evex = EVEX256;
3872 			  break;
3873 			case 16:
3874 			  i.tm.opcode_modifier.evex = EVEX128;
3875 			  break;
3876 			default:
3877 			  abort ();
3878 		      }
3879 		    break;
3880 		  }
3881 	      }
3882 
3883 	  if (op >= MAX_OPERANDS)
3884 	    abort ();
3885 	}
3886 
3887       switch (i.tm.opcode_modifier.evex)
3888 	{
3889 	case EVEXLIG: /* LL' is ignored */
3890 	  vec_length = evexlig << 5;
3891 	  break;
3892 	case EVEX128:
3893 	  vec_length = 0 << 5;
3894 	  break;
3895 	case EVEX256:
3896 	  vec_length = 1 << 5;
3897 	  break;
3898 	case EVEX512:
3899 	  vec_length = 2 << 5;
3900 	  break;
3901 	default:
3902 	  abort ();
3903 	  break;
3904 	}
3905       i.vex.bytes[3] |= vec_length;
3906       /* Encode the broadcast bit.  */
3907       if (i.broadcast)
3908 	i.vex.bytes[3] |= 0x10;
3909     }
3910   else
3911     {
3912       if (i.rounding->type != saeonly)
3913 	i.vex.bytes[3] |= 0x10 | (i.rounding->type << 5);
3914       else
3915 	i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3916     }
3917 
3918   if (i.mask && i.mask->mask)
3919     i.vex.bytes[3] |= i.mask->mask->reg_num;
3920 }
3921 
3922 static void
3923 process_immext (void)
3924 {
3925   expressionS *exp;
3926 
3927   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3928      which is coded in the same place as an 8-bit immediate field
3929      would be.  Here we fake an 8-bit immediate operand from the
3930      opcode suffix stored in tm.extension_opcode.
3931 
3932      AVX instructions also use this encoding, for some of
3933      3 argument instructions.  */
3934 
3935   gas_assert (i.imm_operands <= 1
3936 	      && (i.operands <= 2
3937 		  || (is_any_vex_encoding (&i.tm)
3938 		      && i.operands <= 4)));
3939 
3940   exp = &im_expressions[i.imm_operands++];
3941   i.op[i.operands].imms = exp;
3942   i.types[i.operands] = imm8;
3943   i.operands++;
3944   exp->X_op = O_constant;
3945   exp->X_add_number = i.tm.extension_opcode;
3946   i.tm.extension_opcode = None;
3947 }
3948 
3949 
3950 static int
3951 check_hle (void)
3952 {
3953   switch (i.tm.opcode_modifier.hleprefixok)
3954     {
3955     default:
3956       abort ();
3957     case HLEPrefixNone:
3958       as_bad (_("invalid instruction `%s' after `%s'"),
3959 	      i.tm.name, i.hle_prefix);
3960       return 0;
3961     case HLEPrefixLock:
3962       if (i.prefix[LOCK_PREFIX])
3963 	return 1;
3964       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
3965       return 0;
3966     case HLEPrefixAny:
3967       return 1;
3968     case HLEPrefixRelease:
3969       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
3970 	{
3971 	  as_bad (_("instruction `%s' after `xacquire' not allowed"),
3972 		  i.tm.name);
3973 	  return 0;
3974 	}
3975       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
3976 	{
3977 	  as_bad (_("memory destination needed for instruction `%s'"
3978 		    " after `xrelease'"), i.tm.name);
3979 	  return 0;
3980 	}
3981       return 1;
3982     }
3983 }
3984 
3985 /* Try the shortest encoding by shortening operand size.  */
3986 
3987 static void
3988 optimize_encoding (void)
3989 {
3990   unsigned int j;
3991 
3992   if (optimize_for_space
3993       && !is_any_vex_encoding (&i.tm)
3994       && i.reg_operands == 1
3995       && i.imm_operands == 1
3996       && !i.types[1].bitfield.byte
3997       && i.op[0].imms->X_op == O_constant
3998       && fits_in_imm7 (i.op[0].imms->X_add_number)
3999       && (i.tm.base_opcode == 0xa8
4000 	  || (i.tm.base_opcode == 0xf6
4001 	      && i.tm.extension_opcode == 0x0)))
4002     {
4003       /* Optimize: -Os:
4004 	   test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4005        */
4006       unsigned int base_regnum = i.op[1].regs->reg_num;
4007       if (flag_code == CODE_64BIT || base_regnum < 4)
4008 	{
4009 	  i.types[1].bitfield.byte = 1;
4010 	  /* Ignore the suffix.  */
4011 	  i.suffix = 0;
4012 	  /* Convert to byte registers.  */
4013 	  if (i.types[1].bitfield.word)
4014 	    j = 16;
4015 	  else if (i.types[1].bitfield.dword)
4016 	    j = 32;
4017 	  else
4018 	    j = 48;
4019 	  if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4020 	    j += 8;
4021 	  i.op[1].regs -= j;
4022 	}
4023     }
4024   else if (flag_code == CODE_64BIT
4025 	   && !is_any_vex_encoding (&i.tm)
4026 	   && ((i.types[1].bitfield.qword
4027 		&& i.reg_operands == 1
4028 		&& i.imm_operands == 1
4029 		&& i.op[0].imms->X_op == O_constant
4030 		&& ((i.tm.base_opcode == 0xb8
4031 		     && i.tm.extension_opcode == None
4032 		     && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4033 		    || (fits_in_imm31 (i.op[0].imms->X_add_number)
4034 			&& ((i.tm.base_opcode == 0x24
4035 			     || i.tm.base_opcode == 0xa8)
4036 			    || (i.tm.base_opcode == 0x80
4037 				&& i.tm.extension_opcode == 0x4)
4038 			    || ((i.tm.base_opcode == 0xf6
4039 				 || (i.tm.base_opcode | 1) == 0xc7)
4040 				&& i.tm.extension_opcode == 0x0)))
4041 		    || (fits_in_imm7 (i.op[0].imms->X_add_number)
4042 			&& i.tm.base_opcode == 0x83
4043 			&& i.tm.extension_opcode == 0x4)))
4044 	       || (i.types[0].bitfield.qword
4045 		   && ((i.reg_operands == 2
4046 			&& i.op[0].regs == i.op[1].regs
4047 			&& (i.tm.base_opcode == 0x30
4048 			    || i.tm.base_opcode == 0x28))
4049 		       || (i.reg_operands == 1
4050 			   && i.operands == 1
4051 			   && i.tm.base_opcode == 0x30)))))
4052     {
4053       /* Optimize: -O:
4054 	   andq $imm31, %r64   -> andl $imm31, %r32
4055 	   andq $imm7, %r64    -> andl $imm7, %r32
4056 	   testq $imm31, %r64  -> testl $imm31, %r32
4057 	   xorq %r64, %r64     -> xorl %r32, %r32
4058 	   subq %r64, %r64     -> subl %r32, %r32
4059 	   movq $imm31, %r64   -> movl $imm31, %r32
4060 	   movq $imm32, %r64   -> movl $imm32, %r32
4061         */
4062       i.tm.opcode_modifier.norex64 = 1;
4063       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4064 	{
4065 	  /* Handle
4066 	       movq $imm31, %r64   -> movl $imm31, %r32
4067 	       movq $imm32, %r64   -> movl $imm32, %r32
4068 	   */
4069 	  i.tm.operand_types[0].bitfield.imm32 = 1;
4070 	  i.tm.operand_types[0].bitfield.imm32s = 0;
4071 	  i.tm.operand_types[0].bitfield.imm64 = 0;
4072 	  i.types[0].bitfield.imm32 = 1;
4073 	  i.types[0].bitfield.imm32s = 0;
4074 	  i.types[0].bitfield.imm64 = 0;
4075 	  i.types[1].bitfield.dword = 1;
4076 	  i.types[1].bitfield.qword = 0;
4077 	  if ((i.tm.base_opcode | 1) == 0xc7)
4078 	    {
4079 	      /* Handle
4080 		   movq $imm31, %r64   -> movl $imm31, %r32
4081 	       */
4082 	      i.tm.base_opcode = 0xb8;
4083 	      i.tm.extension_opcode = None;
4084 	      i.tm.opcode_modifier.w = 0;
4085 	      i.tm.opcode_modifier.shortform = 1;
4086 	      i.tm.opcode_modifier.modrm = 0;
4087 	    }
4088 	}
4089     }
4090   else if (optimize > 1
4091 	   && !optimize_for_space
4092 	   && !is_any_vex_encoding (&i.tm)
4093 	   && i.reg_operands == 2
4094 	   && i.op[0].regs == i.op[1].regs
4095 	   && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4096 	       || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4097 	   && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4098     {
4099       /* Optimize: -O2:
4100 	   andb %rN, %rN  -> testb %rN, %rN
4101 	   andw %rN, %rN  -> testw %rN, %rN
4102 	   andq %rN, %rN  -> testq %rN, %rN
4103 	   orb %rN, %rN   -> testb %rN, %rN
4104 	   orw %rN, %rN   -> testw %rN, %rN
4105 	   orq %rN, %rN   -> testq %rN, %rN
4106 
4107 	   and outside of 64-bit mode
4108 
4109 	   andl %rN, %rN  -> testl %rN, %rN
4110 	   orl %rN, %rN   -> testl %rN, %rN
4111        */
4112       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4113     }
4114   else if (i.reg_operands == 3
4115 	   && i.op[0].regs == i.op[1].regs
4116 	   && !i.types[2].bitfield.xmmword
4117 	   && (i.tm.opcode_modifier.vex
4118 	       || ((!i.mask || i.mask->zeroing)
4119 		   && !i.rounding
4120 		   && is_evex_encoding (&i.tm)
4121 		   && (i.vec_encoding != vex_encoding_evex
4122 		       || cpu_arch_isa_flags.bitfield.cpuavx512vl
4123 		       || i.tm.cpu_flags.bitfield.cpuavx512vl
4124 		       || (i.tm.operand_types[2].bitfield.zmmword
4125 			   && i.types[2].bitfield.ymmword))))
4126 	   && ((i.tm.base_opcode == 0x55
4127 		|| i.tm.base_opcode == 0x6655
4128 		|| i.tm.base_opcode == 0x66df
4129 		|| i.tm.base_opcode == 0x57
4130 		|| i.tm.base_opcode == 0x6657
4131 		|| i.tm.base_opcode == 0x66ef
4132 		|| i.tm.base_opcode == 0x66f8
4133 		|| i.tm.base_opcode == 0x66f9
4134 		|| i.tm.base_opcode == 0x66fa
4135 		|| i.tm.base_opcode == 0x66fb
4136 		|| i.tm.base_opcode == 0x42
4137 		|| i.tm.base_opcode == 0x6642
4138 		|| i.tm.base_opcode == 0x47
4139 		|| i.tm.base_opcode == 0x6647)
4140 	       && i.tm.extension_opcode == None))
4141     {
4142       /* Optimize: -O1:
4143 	   VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4144 	   vpsubq and vpsubw:
4145 	     EVEX VOP %zmmM, %zmmM, %zmmN
4146 	       -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4147 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4148 	     EVEX VOP %ymmM, %ymmM, %ymmN
4149 	       -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4150 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4151 	     VEX VOP %ymmM, %ymmM, %ymmN
4152 	       -> VEX VOP %xmmM, %xmmM, %xmmN
4153 	   VOP, one of vpandn and vpxor:
4154 	     VEX VOP %ymmM, %ymmM, %ymmN
4155 	       -> VEX VOP %xmmM, %xmmM, %xmmN
4156 	   VOP, one of vpandnd and vpandnq:
4157 	     EVEX VOP %zmmM, %zmmM, %zmmN
4158 	       -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4159 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4160 	     EVEX VOP %ymmM, %ymmM, %ymmN
4161 	       -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4162 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4163 	   VOP, one of vpxord and vpxorq:
4164 	     EVEX VOP %zmmM, %zmmM, %zmmN
4165 	       -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4166 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4167 	     EVEX VOP %ymmM, %ymmM, %ymmN
4168 	       -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4169 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4170 	   VOP, one of kxord and kxorq:
4171 	     VEX VOP %kM, %kM, %kN
4172 	       -> VEX kxorw %kM, %kM, %kN
4173 	   VOP, one of kandnd and kandnq:
4174 	     VEX VOP %kM, %kM, %kN
4175 	       -> VEX kandnw %kM, %kM, %kN
4176        */
4177       if (is_evex_encoding (&i.tm))
4178 	{
4179 	  if (i.vec_encoding != vex_encoding_evex)
4180 	    {
4181 	      i.tm.opcode_modifier.vex = VEX128;
4182 	      i.tm.opcode_modifier.vexw = VEXW0;
4183 	      i.tm.opcode_modifier.evex = 0;
4184 	    }
4185 	  else if (optimize > 1)
4186 	    i.tm.opcode_modifier.evex = EVEX128;
4187 	  else
4188 	    return;
4189 	}
4190       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4191 	{
4192 	  i.tm.base_opcode &= 0xff;
4193 	  i.tm.opcode_modifier.vexw = VEXW0;
4194 	}
4195       else
4196 	i.tm.opcode_modifier.vex = VEX128;
4197 
4198       if (i.tm.opcode_modifier.vex)
4199 	for (j = 0; j < 3; j++)
4200 	  {
4201 	    i.types[j].bitfield.xmmword = 1;
4202 	    i.types[j].bitfield.ymmword = 0;
4203 	  }
4204     }
4205   else if (i.vec_encoding != vex_encoding_evex
4206 	   && !i.types[0].bitfield.zmmword
4207 	   && !i.types[1].bitfield.zmmword
4208 	   && !i.mask
4209 	   && !i.broadcast
4210 	   && is_evex_encoding (&i.tm)
4211 	   && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x666f
4212 	       || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf36f
4213 	       || (i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f
4214 	       || (i.tm.base_opcode & ~4) == 0x66db
4215 	       || (i.tm.base_opcode & ~4) == 0x66eb)
4216 	   && i.tm.extension_opcode == None)
4217     {
4218       /* Optimize: -O1:
4219 	   VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4220 	   vmovdqu32 and vmovdqu64:
4221 	     EVEX VOP %xmmM, %xmmN
4222 	       -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4223 	     EVEX VOP %ymmM, %ymmN
4224 	       -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4225 	     EVEX VOP %xmmM, mem
4226 	       -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4227 	     EVEX VOP %ymmM, mem
4228 	       -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4229 	     EVEX VOP mem, %xmmN
4230 	       -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4231 	     EVEX VOP mem, %ymmN
4232 	       -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4233 	   VOP, one of vpand, vpandn, vpor, vpxor:
4234 	     EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4235 	       -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4236 	     EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4237 	       -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4238 	     EVEX VOP{d,q} mem, %xmmM, %xmmN
4239 	       -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4240 	     EVEX VOP{d,q} mem, %ymmM, %ymmN
4241 	       -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4242        */
4243       for (j = 0; j < i.operands; j++)
4244 	if (operand_type_check (i.types[j], disp)
4245 	    && i.op[j].disps->X_op == O_constant)
4246 	  {
4247 	    /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4248 	       has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4249 	       bytes, we choose EVEX Disp8 over VEX Disp32.  */
4250 	    int evex_disp8, vex_disp8;
4251 	    unsigned int memshift = i.memshift;
4252 	    offsetT n = i.op[j].disps->X_add_number;
4253 
4254 	    evex_disp8 = fits_in_disp8 (n);
4255 	    i.memshift = 0;
4256 	    vex_disp8 = fits_in_disp8 (n);
4257 	    if (evex_disp8 != vex_disp8)
4258 	      {
4259 		i.memshift = memshift;
4260 		return;
4261 	      }
4262 
4263 	    i.types[j].bitfield.disp8 = vex_disp8;
4264 	    break;
4265 	  }
4266       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0xf26f)
4267 	i.tm.base_opcode ^= 0xf36f ^ 0xf26f;
4268       i.tm.opcode_modifier.vex
4269 	= i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4270       i.tm.opcode_modifier.vexw = VEXW0;
4271       /* VPAND, VPOR, and VPXOR are commutative.  */
4272       if (i.reg_operands == 3 && i.tm.base_opcode != 0x66df)
4273 	i.tm.opcode_modifier.commutative = 1;
4274       i.tm.opcode_modifier.evex = 0;
4275       i.tm.opcode_modifier.masking = 0;
4276       i.tm.opcode_modifier.broadcast = 0;
4277       i.tm.opcode_modifier.disp8memshift = 0;
4278       i.memshift = 0;
4279       if (j < i.operands)
4280 	i.types[j].bitfield.disp8
4281 	  = fits_in_disp8 (i.op[j].disps->X_add_number);
4282     }
4283 }
4284 
4285 /* This is the guts of the machine-dependent assembler.  LINE points to a
4286    machine dependent instruction.  This function is supposed to emit
4287    the frags/bytes it assembles to.  */
4288 
4289 void
4290 md_assemble (char *line)
4291 {
4292   unsigned int j;
4293   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4294   const insn_template *t;
4295 
4296   /* Initialize globals.  */
4297   memset (&i, '\0', sizeof (i));
4298   for (j = 0; j < MAX_OPERANDS; j++)
4299     i.reloc[j] = NO_RELOC;
4300   memset (disp_expressions, '\0', sizeof (disp_expressions));
4301   memset (im_expressions, '\0', sizeof (im_expressions));
4302   save_stack_p = save_stack;
4303 
4304   /* First parse an instruction mnemonic & call i386_operand for the operands.
4305      We assume that the scrubber has arranged it so that line[0] is the valid
4306      start of a (possibly prefixed) mnemonic.  */
4307 
4308   line = parse_insn (line, mnemonic);
4309   if (line == NULL)
4310     return;
4311   mnem_suffix = i.suffix;
4312 
4313   line = parse_operands (line, mnemonic);
4314   this_operand = -1;
4315   xfree (i.memop1_string);
4316   i.memop1_string = NULL;
4317   if (line == NULL)
4318     return;
4319 
4320   /* Now we've parsed the mnemonic into a set of templates, and have the
4321      operands at hand.  */
4322 
4323   /* All intel opcodes have reversed operands except for "bound" and
4324      "enter".  We also don't reverse intersegment "jmp" and "call"
4325      instructions with 2 immediate operands so that the immediate segment
4326      precedes the offset, as it does when in AT&T mode. */
4327   if (intel_syntax
4328       && i.operands > 1
4329       && (strcmp (mnemonic, "bound") != 0)
4330       && (strcmp (mnemonic, "invlpga") != 0)
4331       && !(operand_type_check (i.types[0], imm)
4332 	   && operand_type_check (i.types[1], imm)))
4333     swap_operands ();
4334 
4335   /* The order of the immediates should be reversed
4336      for 2 immediates extrq and insertq instructions */
4337   if (i.imm_operands == 2
4338       && (strcmp (mnemonic, "extrq") == 0
4339 	  || strcmp (mnemonic, "insertq") == 0))
4340       swap_2_operands (0, 1);
4341 
4342   if (i.imm_operands)
4343     optimize_imm ();
4344 
4345   /* Don't optimize displacement for movabs since it only takes 64bit
4346      displacement.  */
4347   if (i.disp_operands
4348       && i.disp_encoding != disp_encoding_32bit
4349       && (flag_code != CODE_64BIT
4350 	  || strcmp (mnemonic, "movabs") != 0))
4351     optimize_disp ();
4352 
4353   /* Next, we find a template that matches the given insn,
4354      making sure the overlap of the given operands types is consistent
4355      with the template operand types.  */
4356 
4357   if (!(t = match_template (mnem_suffix)))
4358     return;
4359 
4360   if (sse_check != check_none
4361       && !i.tm.opcode_modifier.noavx
4362       && !i.tm.cpu_flags.bitfield.cpuavx
4363       && !i.tm.cpu_flags.bitfield.cpuavx512f
4364       && (i.tm.cpu_flags.bitfield.cpusse
4365 	  || i.tm.cpu_flags.bitfield.cpusse2
4366 	  || i.tm.cpu_flags.bitfield.cpusse3
4367 	  || i.tm.cpu_flags.bitfield.cpussse3
4368 	  || i.tm.cpu_flags.bitfield.cpusse4_1
4369 	  || i.tm.cpu_flags.bitfield.cpusse4_2
4370 	  || i.tm.cpu_flags.bitfield.cpusse4a
4371 	  || i.tm.cpu_flags.bitfield.cpupclmul
4372 	  || i.tm.cpu_flags.bitfield.cpuaes
4373 	  || i.tm.cpu_flags.bitfield.cpusha
4374 	  || i.tm.cpu_flags.bitfield.cpugfni))
4375     {
4376       (sse_check == check_warning
4377        ? as_warn
4378        : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4379     }
4380 
4381   /* Zap movzx and movsx suffix.  The suffix has been set from
4382      "word ptr" or "byte ptr" on the source operand in Intel syntax
4383      or extracted from mnemonic in AT&T syntax.  But we'll use
4384      the destination register to choose the suffix for encoding.  */
4385   if ((i.tm.base_opcode & ~9) == 0x0fb6)
4386     {
4387       /* In Intel syntax, there must be a suffix.  In AT&T syntax, if
4388 	 there is no suffix, the default will be byte extension.  */
4389       if (i.reg_operands != 2
4390 	  && !i.suffix
4391 	  && intel_syntax)
4392 	as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
4393 
4394       i.suffix = 0;
4395     }
4396 
4397   if (i.tm.opcode_modifier.fwait)
4398     if (!add_prefix (FWAIT_OPCODE))
4399       return;
4400 
4401   /* Check if REP prefix is OK.  */
4402   if (i.rep_prefix && !i.tm.opcode_modifier.repprefixok)
4403     {
4404       as_bad (_("invalid instruction `%s' after `%s'"),
4405 		i.tm.name, i.rep_prefix);
4406       return;
4407     }
4408 
4409   /* Check for lock without a lockable instruction.  Destination operand
4410      must be memory unless it is xchg (0x86).  */
4411   if (i.prefix[LOCK_PREFIX]
4412       && (!i.tm.opcode_modifier.islockable
4413 	  || i.mem_operands == 0
4414 	  || (i.tm.base_opcode != 0x86
4415 	      && !(i.flags[i.operands - 1] & Operand_Mem))))
4416     {
4417       as_bad (_("expecting lockable instruction after `lock'"));
4418       return;
4419     }
4420 
4421   /* Check for data size prefix on VEX/XOP/EVEX encoded insns.  */
4422   if (i.prefix[DATA_PREFIX] && is_any_vex_encoding (&i.tm))
4423     {
4424       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
4425       return;
4426     }
4427 
4428   /* Check if HLE prefix is OK.  */
4429   if (i.hle_prefix && !check_hle ())
4430     return;
4431 
4432   /* Check BND prefix.  */
4433   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
4434     as_bad (_("expecting valid branch instruction after `bnd'"));
4435 
4436   /* Check NOTRACK prefix.  */
4437   if (i.notrack_prefix && !i.tm.opcode_modifier.notrackprefixok)
4438     as_bad (_("expecting indirect branch instruction after `notrack'"));
4439 
4440   if (i.tm.cpu_flags.bitfield.cpumpx)
4441     {
4442       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4443 	as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
4444       else if (flag_code != CODE_16BIT
4445 	       ? i.prefix[ADDR_PREFIX]
4446 	       : i.mem_operands && !i.prefix[ADDR_PREFIX])
4447 	as_bad (_("16-bit address isn't allowed in MPX instructions"));
4448     }
4449 
4450   /* Insert BND prefix.  */
4451   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
4452     {
4453       if (!i.prefix[BND_PREFIX])
4454 	add_prefix (BND_PREFIX_OPCODE);
4455       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
4456 	{
4457 	  as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
4458 	  i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
4459 	}
4460     }
4461 
4462   /* Check string instruction segment overrides.  */
4463   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
4464     {
4465       gas_assert (i.mem_operands);
4466       if (!check_string ())
4467 	return;
4468       i.disp_operands = 0;
4469     }
4470 
4471   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
4472     optimize_encoding ();
4473 
4474   if (!process_suffix ())
4475     return;
4476 
4477   /* Update operand types.  */
4478   for (j = 0; j < i.operands; j++)
4479     i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
4480 
4481   /* Make still unresolved immediate matches conform to size of immediate
4482      given in i.suffix.  */
4483   if (!finalize_imm ())
4484     return;
4485 
4486   if (i.types[0].bitfield.imm1)
4487     i.imm_operands = 0;	/* kludge for shift insns.  */
4488 
4489   /* We only need to check those implicit registers for instructions
4490      with 3 operands or less.  */
4491   if (i.operands <= 3)
4492     for (j = 0; j < i.operands; j++)
4493       if (i.types[j].bitfield.instance != InstanceNone
4494 	  && !i.types[j].bitfield.xmmword)
4495 	i.reg_operands--;
4496 
4497   /* ImmExt should be processed after SSE2AVX.  */
4498   if (!i.tm.opcode_modifier.sse2avx
4499       && i.tm.opcode_modifier.immext)
4500     process_immext ();
4501 
4502   /* For insns with operands there are more diddles to do to the opcode.  */
4503   if (i.operands)
4504     {
4505       if (!process_operands ())
4506 	return;
4507     }
4508   else if (!quiet_warnings && i.tm.opcode_modifier.ugh)
4509     {
4510       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
4511       as_warn (_("translating to `%sp'"), i.tm.name);
4512     }
4513 
4514   if (is_any_vex_encoding (&i.tm))
4515     {
4516       if (!cpu_arch_flags.bitfield.cpui286)
4517 	{
4518 	  as_bad (_("instruction `%s' isn't supported outside of protected mode."),
4519 		  i.tm.name);
4520 	  return;
4521 	}
4522 
4523       if (i.tm.opcode_modifier.vex)
4524 	build_vex_prefix (t);
4525       else
4526 	build_evex_prefix ();
4527     }
4528 
4529   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
4530      instructions may define INT_OPCODE as well, so avoid this corner
4531      case for those instructions that use MODRM.  */
4532   if (i.tm.base_opcode == INT_OPCODE
4533       && !i.tm.opcode_modifier.modrm
4534       && i.op[0].imms->X_add_number == 3)
4535     {
4536       i.tm.base_opcode = INT3_OPCODE;
4537       i.imm_operands = 0;
4538     }
4539 
4540   if ((i.tm.opcode_modifier.jump == JUMP
4541        || i.tm.opcode_modifier.jump == JUMP_BYTE
4542        || i.tm.opcode_modifier.jump == JUMP_DWORD)
4543       && i.op[0].disps->X_op == O_constant)
4544     {
4545       /* Convert "jmp constant" (and "call constant") to a jump (call) to
4546 	 the absolute address given by the constant.  Since ix86 jumps and
4547 	 calls are pc relative, we need to generate a reloc.  */
4548       i.op[0].disps->X_add_symbol = &abs_symbol;
4549       i.op[0].disps->X_op = O_symbol;
4550     }
4551 
4552   if (i.tm.opcode_modifier.rex64)
4553     i.rex |= REX_W;
4554 
4555   /* For 8 bit registers we need an empty rex prefix.  Also if the
4556      instruction already has a prefix, we need to convert old
4557      registers to new ones.  */
4558 
4559   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
4560        && (i.op[0].regs->reg_flags & RegRex64) != 0)
4561       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
4562 	  && (i.op[1].regs->reg_flags & RegRex64) != 0)
4563       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
4564 	   || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
4565 	  && i.rex != 0))
4566     {
4567       int x;
4568 
4569       i.rex |= REX_OPCODE;
4570       for (x = 0; x < 2; x++)
4571 	{
4572 	  /* Look for 8 bit operand that uses old registers.  */
4573 	  if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
4574 	      && (i.op[x].regs->reg_flags & RegRex64) == 0)
4575 	    {
4576 	      gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4577 	      /* In case it is "hi" register, give up.  */
4578 	      if (i.op[x].regs->reg_num > 3)
4579 		as_bad (_("can't encode register '%s%s' in an "
4580 			  "instruction requiring REX prefix."),
4581 			register_prefix, i.op[x].regs->reg_name);
4582 
4583 	      /* Otherwise it is equivalent to the extended register.
4584 		 Since the encoding doesn't change this is merely
4585 		 cosmetic cleanup for debug output.  */
4586 
4587 	      i.op[x].regs = i.op[x].regs + 8;
4588 	    }
4589 	}
4590     }
4591 
4592   if (i.rex == 0 && i.rex_encoding)
4593     {
4594       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
4595 	 that uses legacy register.  If it is "hi" register, don't add
4596 	 the REX_OPCODE byte.  */
4597       int x;
4598       for (x = 0; x < 2; x++)
4599 	if (i.types[x].bitfield.class == Reg
4600 	    && i.types[x].bitfield.byte
4601 	    && (i.op[x].regs->reg_flags & RegRex64) == 0
4602 	    && i.op[x].regs->reg_num > 3)
4603 	  {
4604 	    gas_assert (!(i.op[x].regs->reg_flags & RegRex));
4605 	    i.rex_encoding = FALSE;
4606 	    break;
4607 	  }
4608 
4609       if (i.rex_encoding)
4610 	i.rex = REX_OPCODE;
4611     }
4612 
4613   if (i.rex != 0)
4614     add_prefix (REX_OPCODE | i.rex);
4615 
4616   /* We are ready to output the insn.  */
4617   output_insn ();
4618 
4619   last_insn.seg = now_seg;
4620 
4621   if (i.tm.opcode_modifier.isprefix)
4622     {
4623       last_insn.kind = last_insn_prefix;
4624       last_insn.name = i.tm.name;
4625       last_insn.file = as_where (&last_insn.line);
4626     }
4627   else
4628     last_insn.kind = last_insn_other;
4629 }
4630 
4631 static char *
4632 parse_insn (char *line, char *mnemonic)
4633 {
4634   char *l = line;
4635   char *token_start = l;
4636   char *mnem_p;
4637   int supported;
4638   const insn_template *t;
4639   char *dot_p = NULL;
4640 
4641   while (1)
4642     {
4643       mnem_p = mnemonic;
4644       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
4645 	{
4646 	  if (*mnem_p == '.')
4647 	    dot_p = mnem_p;
4648 	  mnem_p++;
4649 	  if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
4650 	    {
4651 	      as_bad (_("no such instruction: `%s'"), token_start);
4652 	      return NULL;
4653 	    }
4654 	  l++;
4655 	}
4656       if (!is_space_char (*l)
4657 	  && *l != END_OF_INSN
4658 	  && (intel_syntax
4659 	      || (*l != PREFIX_SEPARATOR
4660 		  && *l != ',')))
4661 	{
4662 	  as_bad (_("invalid character %s in mnemonic"),
4663 		  output_invalid (*l));
4664 	  return NULL;
4665 	}
4666       if (token_start == l)
4667 	{
4668 	  if (!intel_syntax && *l == PREFIX_SEPARATOR)
4669 	    as_bad (_("expecting prefix; got nothing"));
4670 	  else
4671 	    as_bad (_("expecting mnemonic; got nothing"));
4672 	  return NULL;
4673 	}
4674 
4675       /* Look up instruction (or prefix) via hash table.  */
4676       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4677 
4678       if (*l != END_OF_INSN
4679 	  && (!is_space_char (*l) || l[1] != END_OF_INSN)
4680 	  && current_templates
4681 	  && current_templates->start->opcode_modifier.isprefix)
4682 	{
4683 	  if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
4684 	    {
4685 	      as_bad ((flag_code != CODE_64BIT
4686 		       ? _("`%s' is only supported in 64-bit mode")
4687 		       : _("`%s' is not supported in 64-bit mode")),
4688 		      current_templates->start->name);
4689 	      return NULL;
4690 	    }
4691 	  /* If we are in 16-bit mode, do not allow addr16 or data16.
4692 	     Similarly, in 32-bit mode, do not allow addr32 or data32.  */
4693 	  if ((current_templates->start->opcode_modifier.size == SIZE16
4694 	       || current_templates->start->opcode_modifier.size == SIZE32)
4695 	      && flag_code != CODE_64BIT
4696 	      && ((current_templates->start->opcode_modifier.size == SIZE32)
4697 		  ^ (flag_code == CODE_16BIT)))
4698 	    {
4699 	      as_bad (_("redundant %s prefix"),
4700 		      current_templates->start->name);
4701 	      return NULL;
4702 	    }
4703 	  if (current_templates->start->opcode_length == 0)
4704 	    {
4705 	      /* Handle pseudo prefixes.  */
4706 	      switch (current_templates->start->base_opcode)
4707 		{
4708 		case 0x0:
4709 		  /* {disp8} */
4710 		  i.disp_encoding = disp_encoding_8bit;
4711 		  break;
4712 		case 0x1:
4713 		  /* {disp32} */
4714 		  i.disp_encoding = disp_encoding_32bit;
4715 		  break;
4716 		case 0x2:
4717 		  /* {load} */
4718 		  i.dir_encoding = dir_encoding_load;
4719 		  break;
4720 		case 0x3:
4721 		  /* {store} */
4722 		  i.dir_encoding = dir_encoding_store;
4723 		  break;
4724 		case 0x4:
4725 		  /* {vex} */
4726 		  i.vec_encoding = vex_encoding_vex;
4727 		  break;
4728 		case 0x5:
4729 		  /* {vex3} */
4730 		  i.vec_encoding = vex_encoding_vex3;
4731 		  break;
4732 		case 0x6:
4733 		  /* {evex} */
4734 		  i.vec_encoding = vex_encoding_evex;
4735 		  break;
4736 		case 0x7:
4737 		  /* {rex} */
4738 		  i.rex_encoding = TRUE;
4739 		  break;
4740 		case 0x8:
4741 		  /* {nooptimize} */
4742 		  i.no_optimize = TRUE;
4743 		  break;
4744 		default:
4745 		  abort ();
4746 		}
4747 	    }
4748 	  else
4749 	    {
4750 	      /* Add prefix, checking for repeated prefixes.  */
4751 	      switch (add_prefix (current_templates->start->base_opcode))
4752 		{
4753 		case PREFIX_EXIST:
4754 		  return NULL;
4755 		case PREFIX_DS:
4756 		  if (current_templates->start->cpu_flags.bitfield.cpuibt)
4757 		    i.notrack_prefix = current_templates->start->name;
4758 		  break;
4759 		case PREFIX_REP:
4760 		  if (current_templates->start->cpu_flags.bitfield.cpuhle)
4761 		    i.hle_prefix = current_templates->start->name;
4762 		  else if (current_templates->start->cpu_flags.bitfield.cpumpx)
4763 		    i.bnd_prefix = current_templates->start->name;
4764 		  else
4765 		    i.rep_prefix = current_templates->start->name;
4766 		  break;
4767 		default:
4768 		  break;
4769 		}
4770 	    }
4771 	  /* Skip past PREFIX_SEPARATOR and reset token_start.  */
4772 	  token_start = ++l;
4773 	}
4774       else
4775 	break;
4776     }
4777 
4778   if (!current_templates)
4779     {
4780       /* Deprecated functionality (new code should use pseudo-prefixes instead):
4781 	 Check if we should swap operand or force 32bit displacement in
4782 	 encoding.  */
4783       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
4784 	i.dir_encoding = dir_encoding_swap;
4785       else if (mnem_p - 3 == dot_p
4786 	       && dot_p[1] == 'd'
4787 	       && dot_p[2] == '8')
4788 	i.disp_encoding = disp_encoding_8bit;
4789       else if (mnem_p - 4 == dot_p
4790 	       && dot_p[1] == 'd'
4791 	       && dot_p[2] == '3'
4792 	       && dot_p[3] == '2')
4793 	i.disp_encoding = disp_encoding_32bit;
4794       else
4795 	goto check_suffix;
4796       mnem_p = dot_p;
4797       *dot_p = '\0';
4798       current_templates = (const templates *) hash_find (op_hash, mnemonic);
4799     }
4800 
4801   if (!current_templates)
4802     {
4803 check_suffix:
4804       if (mnem_p > mnemonic)
4805 	{
4806 	  /* See if we can get a match by trimming off a suffix.  */
4807 	  switch (mnem_p[-1])
4808 	    {
4809 	    case WORD_MNEM_SUFFIX:
4810 	      if (intel_syntax && (intel_float_operand (mnemonic) & 2))
4811 		i.suffix = SHORT_MNEM_SUFFIX;
4812 	      else
4813 		/* Fall through.  */
4814 	      case BYTE_MNEM_SUFFIX:
4815 	      case QWORD_MNEM_SUFFIX:
4816 		i.suffix = mnem_p[-1];
4817 	      mnem_p[-1] = '\0';
4818 	      current_templates = (const templates *) hash_find (op_hash,
4819 								 mnemonic);
4820 	      break;
4821 	    case SHORT_MNEM_SUFFIX:
4822 	    case LONG_MNEM_SUFFIX:
4823 	      if (!intel_syntax)
4824 		{
4825 		  i.suffix = mnem_p[-1];
4826 		  mnem_p[-1] = '\0';
4827 		  current_templates = (const templates *) hash_find (op_hash,
4828 								     mnemonic);
4829 		}
4830 	      break;
4831 
4832 	      /* Intel Syntax.  */
4833 	    case 'd':
4834 	      if (intel_syntax)
4835 		{
4836 		  if (intel_float_operand (mnemonic) == 1)
4837 		    i.suffix = SHORT_MNEM_SUFFIX;
4838 		  else
4839 		    i.suffix = LONG_MNEM_SUFFIX;
4840 		  mnem_p[-1] = '\0';
4841 		  current_templates = (const templates *) hash_find (op_hash,
4842 								     mnemonic);
4843 		}
4844 	      break;
4845 	    }
4846 	}
4847 
4848       if (!current_templates)
4849 	{
4850 	  as_bad (_("no such instruction: `%s'"), token_start);
4851 	  return NULL;
4852 	}
4853     }
4854 
4855   if (current_templates->start->opcode_modifier.jump == JUMP
4856       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
4857     {
4858       /* Check for a branch hint.  We allow ",pt" and ",pn" for
4859 	 predict taken and predict not taken respectively.
4860 	 I'm not sure that branch hints actually do anything on loop
4861 	 and jcxz insns (JumpByte) for current Pentium4 chips.  They
4862 	 may work in the future and it doesn't hurt to accept them
4863 	 now.  */
4864       if (l[0] == ',' && l[1] == 'p')
4865 	{
4866 	  if (l[2] == 't')
4867 	    {
4868 	      if (!add_prefix (DS_PREFIX_OPCODE))
4869 		return NULL;
4870 	      l += 3;
4871 	    }
4872 	  else if (l[2] == 'n')
4873 	    {
4874 	      if (!add_prefix (CS_PREFIX_OPCODE))
4875 		return NULL;
4876 	      l += 3;
4877 	    }
4878 	}
4879     }
4880   /* Any other comma loses.  */
4881   if (*l == ',')
4882     {
4883       as_bad (_("invalid character %s in mnemonic"),
4884 	      output_invalid (*l));
4885       return NULL;
4886     }
4887 
4888   /* Check if instruction is supported on specified architecture.  */
4889   supported = 0;
4890   for (t = current_templates->start; t < current_templates->end; ++t)
4891     {
4892       supported |= cpu_flags_match (t);
4893       if (supported == CPU_FLAGS_PERFECT_MATCH)
4894 	{
4895 	  if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT))
4896 	    as_warn (_("use .code16 to ensure correct addressing mode"));
4897 
4898 	  return l;
4899 	}
4900     }
4901 
4902   if (!(supported & CPU_FLAGS_64BIT_MATCH))
4903     as_bad (flag_code == CODE_64BIT
4904 	    ? _("`%s' is not supported in 64-bit mode")
4905 	    : _("`%s' is only supported in 64-bit mode"),
4906 	    current_templates->start->name);
4907   else
4908     as_bad (_("`%s' is not supported on `%s%s'"),
4909 	    current_templates->start->name,
4910 	    cpu_arch_name ? cpu_arch_name : default_arch,
4911 	    cpu_sub_arch_name ? cpu_sub_arch_name : "");
4912 
4913   return NULL;
4914 }
4915 
4916 static char *
4917 parse_operands (char *l, const char *mnemonic)
4918 {
4919   char *token_start;
4920 
4921   /* 1 if operand is pending after ','.  */
4922   unsigned int expecting_operand = 0;
4923 
4924   /* Non-zero if operand parens not balanced.  */
4925   unsigned int paren_not_balanced;
4926 
4927   while (*l != END_OF_INSN)
4928     {
4929       /* Skip optional white space before operand.  */
4930       if (is_space_char (*l))
4931 	++l;
4932       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
4933 	{
4934 	  as_bad (_("invalid character %s before operand %d"),
4935 		  output_invalid (*l),
4936 		  i.operands + 1);
4937 	  return NULL;
4938 	}
4939       token_start = l;	/* After white space.  */
4940       paren_not_balanced = 0;
4941       while (paren_not_balanced || *l != ',')
4942 	{
4943 	  if (*l == END_OF_INSN)
4944 	    {
4945 	      if (paren_not_balanced)
4946 		{
4947 		  if (!intel_syntax)
4948 		    as_bad (_("unbalanced parenthesis in operand %d."),
4949 			    i.operands + 1);
4950 		  else
4951 		    as_bad (_("unbalanced brackets in operand %d."),
4952 			    i.operands + 1);
4953 		  return NULL;
4954 		}
4955 	      else
4956 		break;	/* we are done */
4957 	    }
4958 	  else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"')
4959 	    {
4960 	      as_bad (_("invalid character %s in operand %d"),
4961 		      output_invalid (*l),
4962 		      i.operands + 1);
4963 	      return NULL;
4964 	    }
4965 	  if (!intel_syntax)
4966 	    {
4967 	      if (*l == '(')
4968 		++paren_not_balanced;
4969 	      if (*l == ')')
4970 		--paren_not_balanced;
4971 	    }
4972 	  else
4973 	    {
4974 	      if (*l == '[')
4975 		++paren_not_balanced;
4976 	      if (*l == ']')
4977 		--paren_not_balanced;
4978 	    }
4979 	  l++;
4980 	}
4981       if (l != token_start)
4982 	{			/* Yes, we've read in another operand.  */
4983 	  unsigned int operand_ok;
4984 	  this_operand = i.operands++;
4985 	  if (i.operands > MAX_OPERANDS)
4986 	    {
4987 	      as_bad (_("spurious operands; (%d operands/instruction max)"),
4988 		      MAX_OPERANDS);
4989 	      return NULL;
4990 	    }
4991 	  i.types[this_operand].bitfield.unspecified = 1;
4992 	  /* Now parse operand adding info to 'i' as we go along.  */
4993 	  END_STRING_AND_SAVE (l);
4994 
4995 	  if (i.mem_operands > 1)
4996 	    {
4997 	      as_bad (_("too many memory references for `%s'"),
4998 		      mnemonic);
4999 	      return 0;
5000 	    }
5001 
5002 	  if (intel_syntax)
5003 	    operand_ok =
5004 	      i386_intel_operand (token_start,
5005 				  intel_float_operand (mnemonic));
5006 	  else
5007 	    operand_ok = i386_att_operand (token_start);
5008 
5009 	  RESTORE_END_STRING (l);
5010 	  if (!operand_ok)
5011 	    return NULL;
5012 	}
5013       else
5014 	{
5015 	  if (expecting_operand)
5016 	    {
5017 	    expecting_operand_after_comma:
5018 	      as_bad (_("expecting operand after ','; got nothing"));
5019 	      return NULL;
5020 	    }
5021 	  if (*l == ',')
5022 	    {
5023 	      as_bad (_("expecting operand before ','; got nothing"));
5024 	      return NULL;
5025 	    }
5026 	}
5027 
5028       /* Now *l must be either ',' or END_OF_INSN.  */
5029       if (*l == ',')
5030 	{
5031 	  if (*++l == END_OF_INSN)
5032 	    {
5033 	      /* Just skip it, if it's \n complain.  */
5034 	      goto expecting_operand_after_comma;
5035 	    }
5036 	  expecting_operand = 1;
5037 	}
5038     }
5039   return l;
5040 }
5041 
5042 static void
5043 swap_2_operands (int xchg1, int xchg2)
5044 {
5045   union i386_op temp_op;
5046   i386_operand_type temp_type;
5047   unsigned int temp_flags;
5048   enum bfd_reloc_code_real temp_reloc;
5049 
5050   temp_type = i.types[xchg2];
5051   i.types[xchg2] = i.types[xchg1];
5052   i.types[xchg1] = temp_type;
5053 
5054   temp_flags = i.flags[xchg2];
5055   i.flags[xchg2] = i.flags[xchg1];
5056   i.flags[xchg1] = temp_flags;
5057 
5058   temp_op = i.op[xchg2];
5059   i.op[xchg2] = i.op[xchg1];
5060   i.op[xchg1] = temp_op;
5061 
5062   temp_reloc = i.reloc[xchg2];
5063   i.reloc[xchg2] = i.reloc[xchg1];
5064   i.reloc[xchg1] = temp_reloc;
5065 
5066   if (i.mask)
5067     {
5068       if (i.mask->operand == xchg1)
5069 	i.mask->operand = xchg2;
5070       else if (i.mask->operand == xchg2)
5071 	i.mask->operand = xchg1;
5072     }
5073   if (i.broadcast)
5074     {
5075       if (i.broadcast->operand == xchg1)
5076 	i.broadcast->operand = xchg2;
5077       else if (i.broadcast->operand == xchg2)
5078 	i.broadcast->operand = xchg1;
5079     }
5080   if (i.rounding)
5081     {
5082       if (i.rounding->operand == xchg1)
5083 	i.rounding->operand = xchg2;
5084       else if (i.rounding->operand == xchg2)
5085 	i.rounding->operand = xchg1;
5086     }
5087 }
5088 
5089 static void
5090 swap_operands (void)
5091 {
5092   switch (i.operands)
5093     {
5094     case 5:
5095     case 4:
5096       swap_2_operands (1, i.operands - 2);
5097       /* Fall through.  */
5098     case 3:
5099     case 2:
5100       swap_2_operands (0, i.operands - 1);
5101       break;
5102     default:
5103       abort ();
5104     }
5105 
5106   if (i.mem_operands == 2)
5107     {
5108       const seg_entry *temp_seg;
5109       temp_seg = i.seg[0];
5110       i.seg[0] = i.seg[1];
5111       i.seg[1] = temp_seg;
5112     }
5113 }
5114 
5115 /* Try to ensure constant immediates are represented in the smallest
5116    opcode possible.  */
5117 static void
5118 optimize_imm (void)
5119 {
5120   char guess_suffix = 0;
5121   int op;
5122 
5123   if (i.suffix)
5124     guess_suffix = i.suffix;
5125   else if (i.reg_operands)
5126     {
5127       /* Figure out a suffix from the last register operand specified.
5128 	 We can't do this properly yet, i.e. excluding special register
5129 	 instances, but the following works for instructions with
5130 	 immediates.  In any case, we can't set i.suffix yet.  */
5131       for (op = i.operands; --op >= 0;)
5132 	if (i.types[op].bitfield.class != Reg)
5133 	  continue;
5134 	else if (i.types[op].bitfield.byte)
5135 	  {
5136 	    guess_suffix = BYTE_MNEM_SUFFIX;
5137 	    break;
5138 	  }
5139 	else if (i.types[op].bitfield.word)
5140 	  {
5141 	    guess_suffix = WORD_MNEM_SUFFIX;
5142 	    break;
5143 	  }
5144 	else if (i.types[op].bitfield.dword)
5145 	  {
5146 	    guess_suffix = LONG_MNEM_SUFFIX;
5147 	    break;
5148 	  }
5149 	else if (i.types[op].bitfield.qword)
5150 	  {
5151 	    guess_suffix = QWORD_MNEM_SUFFIX;
5152 	    break;
5153 	  }
5154     }
5155   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5156     guess_suffix = WORD_MNEM_SUFFIX;
5157 
5158   for (op = i.operands; --op >= 0;)
5159     if (operand_type_check (i.types[op], imm))
5160       {
5161 	switch (i.op[op].imms->X_op)
5162 	  {
5163 	  case O_constant:
5164 	    /* If a suffix is given, this operand may be shortened.  */
5165 	    switch (guess_suffix)
5166 	      {
5167 	      case LONG_MNEM_SUFFIX:
5168 		i.types[op].bitfield.imm32 = 1;
5169 		i.types[op].bitfield.imm64 = 1;
5170 		break;
5171 	      case WORD_MNEM_SUFFIX:
5172 		i.types[op].bitfield.imm16 = 1;
5173 		i.types[op].bitfield.imm32 = 1;
5174 		i.types[op].bitfield.imm32s = 1;
5175 		i.types[op].bitfield.imm64 = 1;
5176 		break;
5177 	      case BYTE_MNEM_SUFFIX:
5178 		i.types[op].bitfield.imm8 = 1;
5179 		i.types[op].bitfield.imm8s = 1;
5180 		i.types[op].bitfield.imm16 = 1;
5181 		i.types[op].bitfield.imm32 = 1;
5182 		i.types[op].bitfield.imm32s = 1;
5183 		i.types[op].bitfield.imm64 = 1;
5184 		break;
5185 	      }
5186 
5187 	    /* If this operand is at most 16 bits, convert it
5188 	       to a signed 16 bit number before trying to see
5189 	       whether it will fit in an even smaller size.
5190 	       This allows a 16-bit operand such as $0xffe0 to
5191 	       be recognised as within Imm8S range.  */
5192 	    if ((i.types[op].bitfield.imm16)
5193 		&& (i.op[op].imms->X_add_number & ~(offsetT) 0xffff) == 0)
5194 	      {
5195 		i.op[op].imms->X_add_number =
5196 		  (((i.op[op].imms->X_add_number & 0xffff) ^ 0x8000) - 0x8000);
5197 	      }
5198 #ifdef BFD64
5199 	    /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5200 	    if ((i.types[op].bitfield.imm32)
5201 		&& ((i.op[op].imms->X_add_number & ~(((offsetT) 2 << 31) - 1))
5202 		    == 0))
5203 	      {
5204 		i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5205 						^ ((offsetT) 1 << 31))
5206 					       - ((offsetT) 1 << 31));
5207 	      }
5208 #endif
5209 	    i.types[op]
5210 	      = operand_type_or (i.types[op],
5211 				 smallest_imm_type (i.op[op].imms->X_add_number));
5212 
5213 	    /* We must avoid matching of Imm32 templates when 64bit
5214 	       only immediate is available.  */
5215 	    if (guess_suffix == QWORD_MNEM_SUFFIX)
5216 	      i.types[op].bitfield.imm32 = 0;
5217 	    break;
5218 
5219 	  case O_absent:
5220 	  case O_register:
5221 	    abort ();
5222 
5223 	    /* Symbols and expressions.  */
5224 	  default:
5225 	    /* Convert symbolic operand to proper sizes for matching, but don't
5226 	       prevent matching a set of insns that only supports sizes other
5227 	       than those matching the insn suffix.  */
5228 	    {
5229 	      i386_operand_type mask, allowed;
5230 	      const insn_template *t;
5231 
5232 	      operand_type_set (&mask, 0);
5233 	      operand_type_set (&allowed, 0);
5234 
5235 	      for (t = current_templates->start;
5236 		   t < current_templates->end;
5237 		   ++t)
5238 		{
5239 		  allowed = operand_type_or (allowed, t->operand_types[op]);
5240 		  allowed = operand_type_and (allowed, anyimm);
5241 		}
5242 	      switch (guess_suffix)
5243 		{
5244 		case QWORD_MNEM_SUFFIX:
5245 		  mask.bitfield.imm64 = 1;
5246 		  mask.bitfield.imm32s = 1;
5247 		  break;
5248 		case LONG_MNEM_SUFFIX:
5249 		  mask.bitfield.imm32 = 1;
5250 		  break;
5251 		case WORD_MNEM_SUFFIX:
5252 		  mask.bitfield.imm16 = 1;
5253 		  break;
5254 		case BYTE_MNEM_SUFFIX:
5255 		  mask.bitfield.imm8 = 1;
5256 		  break;
5257 		default:
5258 		  break;
5259 		}
5260 	      allowed = operand_type_and (mask, allowed);
5261 	      if (!operand_type_all_zero (&allowed))
5262 		i.types[op] = operand_type_and (i.types[op], mask);
5263 	    }
5264 	    break;
5265 	  }
5266       }
5267 }
5268 
5269 /* Try to use the smallest displacement type too.  */
5270 static void
5271 optimize_disp (void)
5272 {
5273   int op;
5274 
5275   for (op = i.operands; --op >= 0;)
5276     if (operand_type_check (i.types[op], disp))
5277       {
5278 	if (i.op[op].disps->X_op == O_constant)
5279 	  {
5280 	    offsetT op_disp = i.op[op].disps->X_add_number;
5281 
5282 	    if (i.types[op].bitfield.disp16
5283 		&& (op_disp & ~(offsetT) 0xffff) == 0)
5284 	      {
5285 		/* If this operand is at most 16 bits, convert
5286 		   to a signed 16 bit number and don't use 64bit
5287 		   displacement.  */
5288 		op_disp = (((op_disp & 0xffff) ^ 0x8000) - 0x8000);
5289 		i.types[op].bitfield.disp64 = 0;
5290 	      }
5291 #ifdef BFD64
5292 	    /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5293 	    if (i.types[op].bitfield.disp32
5294 		&& (op_disp & ~(((offsetT) 2 << 31) - 1)) == 0)
5295 	      {
5296 		/* If this operand is at most 32 bits, convert
5297 		   to a signed 32 bit number and don't use 64bit
5298 		   displacement.  */
5299 		op_disp &= (((offsetT) 2 << 31) - 1);
5300 		op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5301 		i.types[op].bitfield.disp64 = 0;
5302 	      }
5303 #endif
5304 	    if (!op_disp && i.types[op].bitfield.baseindex)
5305 	      {
5306 		i.types[op].bitfield.disp8 = 0;
5307 		i.types[op].bitfield.disp16 = 0;
5308 		i.types[op].bitfield.disp32 = 0;
5309 		i.types[op].bitfield.disp32s = 0;
5310 		i.types[op].bitfield.disp64 = 0;
5311 		i.op[op].disps = 0;
5312 		i.disp_operands--;
5313 	      }
5314 	    else if (flag_code == CODE_64BIT)
5315 	      {
5316 		if (fits_in_signed_long (op_disp))
5317 		  {
5318 		    i.types[op].bitfield.disp64 = 0;
5319 		    i.types[op].bitfield.disp32s = 1;
5320 		  }
5321 		if (i.prefix[ADDR_PREFIX]
5322 		    && fits_in_unsigned_long (op_disp))
5323 		  i.types[op].bitfield.disp32 = 1;
5324 	      }
5325 	    if ((i.types[op].bitfield.disp32
5326 		 || i.types[op].bitfield.disp32s
5327 		 || i.types[op].bitfield.disp16)
5328 		&& fits_in_disp8 (op_disp))
5329 	      i.types[op].bitfield.disp8 = 1;
5330 	  }
5331 	else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5332 		 || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5333 	  {
5334 	    fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5335 			 i.op[op].disps, 0, i.reloc[op]);
5336 	    i.types[op].bitfield.disp8 = 0;
5337 	    i.types[op].bitfield.disp16 = 0;
5338 	    i.types[op].bitfield.disp32 = 0;
5339 	    i.types[op].bitfield.disp32s = 0;
5340 	    i.types[op].bitfield.disp64 = 0;
5341 	  }
5342  	else
5343 	  /* We only support 64bit displacement on constants.  */
5344 	  i.types[op].bitfield.disp64 = 0;
5345       }
5346 }
5347 
5348 /* Return 1 if there is a match in broadcast bytes between operand
5349    GIVEN and instruction template T.   */
5350 
5351 static INLINE int
5352 match_broadcast_size (const insn_template *t, unsigned int given)
5353 {
5354   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5355 	   && i.types[given].bitfield.byte)
5356 	  || (t->opcode_modifier.broadcast == WORD_BROADCAST
5357 	      && i.types[given].bitfield.word)
5358 	  || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5359 	      && i.types[given].bitfield.dword)
5360 	  || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5361 	      && i.types[given].bitfield.qword));
5362 }
5363 
5364 /* Check if operands are valid for the instruction.  */
5365 
5366 static int
5367 check_VecOperands (const insn_template *t)
5368 {
5369   unsigned int op;
5370   i386_cpu_flags cpu;
5371   static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
5372 
5373   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
5374      any one operand are implicity requiring AVX512VL support if the actual
5375      operand size is YMMword or XMMword.  Since this function runs after
5376      template matching, there's no need to check for YMMword/XMMword in
5377      the template.  */
5378   cpu = cpu_flags_and (t->cpu_flags, avx512);
5379   if (!cpu_flags_all_zero (&cpu)
5380       && !t->cpu_flags.bitfield.cpuavx512vl
5381       && !cpu_arch_flags.bitfield.cpuavx512vl)
5382     {
5383       for (op = 0; op < t->operands; ++op)
5384 	{
5385 	  if (t->operand_types[op].bitfield.zmmword
5386 	      && (i.types[op].bitfield.ymmword
5387 		  || i.types[op].bitfield.xmmword))
5388 	    {
5389 	      i.error = unsupported;
5390 	      return 1;
5391 	    }
5392 	}
5393     }
5394 
5395   /* Without VSIB byte, we can't have a vector register for index.  */
5396   if (!t->opcode_modifier.vecsib
5397       && i.index_reg
5398       && (i.index_reg->reg_type.bitfield.xmmword
5399 	  || i.index_reg->reg_type.bitfield.ymmword
5400 	  || i.index_reg->reg_type.bitfield.zmmword))
5401     {
5402       i.error = unsupported_vector_index_register;
5403       return 1;
5404     }
5405 
5406   /* Check if default mask is allowed.  */
5407   if (t->opcode_modifier.nodefmask
5408       && (!i.mask || i.mask->mask->reg_num == 0))
5409     {
5410       i.error = no_default_mask;
5411       return 1;
5412     }
5413 
5414   /* For VSIB byte, we need a vector register for index, and all vector
5415      registers must be distinct.  */
5416   if (t->opcode_modifier.vecsib)
5417     {
5418       if (!i.index_reg
5419 	  || !((t->opcode_modifier.vecsib == VecSIB128
5420 		&& i.index_reg->reg_type.bitfield.xmmword)
5421 	       || (t->opcode_modifier.vecsib == VecSIB256
5422 		   && i.index_reg->reg_type.bitfield.ymmword)
5423 	       || (t->opcode_modifier.vecsib == VecSIB512
5424 		   && i.index_reg->reg_type.bitfield.zmmword)))
5425       {
5426 	i.error = invalid_vsib_address;
5427 	return 1;
5428       }
5429 
5430       gas_assert (i.reg_operands == 2 || i.mask);
5431       if (i.reg_operands == 2 && !i.mask)
5432 	{
5433 	  gas_assert (i.types[0].bitfield.class == RegSIMD);
5434 	  gas_assert (i.types[0].bitfield.xmmword
5435 		      || i.types[0].bitfield.ymmword);
5436 	  gas_assert (i.types[2].bitfield.class == RegSIMD);
5437 	  gas_assert (i.types[2].bitfield.xmmword
5438 		      || i.types[2].bitfield.ymmword);
5439 	  if (operand_check == check_none)
5440 	    return 0;
5441 	  if (register_number (i.op[0].regs)
5442 	      != register_number (i.index_reg)
5443 	      && register_number (i.op[2].regs)
5444 		 != register_number (i.index_reg)
5445 	      && register_number (i.op[0].regs)
5446 		 != register_number (i.op[2].regs))
5447 	    return 0;
5448 	  if (operand_check == check_error)
5449 	    {
5450 	      i.error = invalid_vector_register_set;
5451 	      return 1;
5452 	    }
5453 	  as_warn (_("mask, index, and destination registers should be distinct"));
5454 	}
5455       else if (i.reg_operands == 1 && i.mask)
5456 	{
5457 	  if (i.types[1].bitfield.class == RegSIMD
5458 	      && (i.types[1].bitfield.xmmword
5459 	          || i.types[1].bitfield.ymmword
5460 	          || i.types[1].bitfield.zmmword)
5461 	      && (register_number (i.op[1].regs)
5462 		  == register_number (i.index_reg)))
5463 	    {
5464 	      if (operand_check == check_error)
5465 		{
5466 		  i.error = invalid_vector_register_set;
5467 		  return 1;
5468 		}
5469 	      if (operand_check != check_none)
5470 		as_warn (_("index and destination registers should be distinct"));
5471 	    }
5472 	}
5473     }
5474 
5475   /* Check if broadcast is supported by the instruction and is applied
5476      to the memory operand.  */
5477   if (i.broadcast)
5478     {
5479       i386_operand_type type, overlap;
5480 
5481       /* Check if specified broadcast is supported in this instruction,
5482 	 and its broadcast bytes match the memory operand.  */
5483       op = i.broadcast->operand;
5484       if (!t->opcode_modifier.broadcast
5485 	  || !(i.flags[op] & Operand_Mem)
5486 	  || (!i.types[op].bitfield.unspecified
5487 	      && !match_broadcast_size (t, op)))
5488 	{
5489 	bad_broadcast:
5490 	  i.error = unsupported_broadcast;
5491 	  return 1;
5492 	}
5493 
5494       i.broadcast->bytes = ((1 << (t->opcode_modifier.broadcast - 1))
5495 			    * i.broadcast->type);
5496       operand_type_set (&type, 0);
5497       switch (i.broadcast->bytes)
5498 	{
5499 	case 2:
5500 	  type.bitfield.word = 1;
5501 	  break;
5502 	case 4:
5503 	  type.bitfield.dword = 1;
5504 	  break;
5505 	case 8:
5506 	  type.bitfield.qword = 1;
5507 	  break;
5508 	case 16:
5509 	  type.bitfield.xmmword = 1;
5510 	  break;
5511 	case 32:
5512 	  type.bitfield.ymmword = 1;
5513 	  break;
5514 	case 64:
5515 	  type.bitfield.zmmword = 1;
5516 	  break;
5517 	default:
5518 	  goto bad_broadcast;
5519 	}
5520 
5521       overlap = operand_type_and (type, t->operand_types[op]);
5522       if (operand_type_all_zero (&overlap))
5523 	  goto bad_broadcast;
5524 
5525       if (t->opcode_modifier.checkregsize)
5526 	{
5527 	  unsigned int j;
5528 
5529 	  type.bitfield.baseindex = 1;
5530 	  for (j = 0; j < i.operands; ++j)
5531 	    {
5532 	      if (j != op
5533 		  && !operand_type_register_match(i.types[j],
5534 						  t->operand_types[j],
5535 						  type,
5536 						  t->operand_types[op]))
5537 		goto bad_broadcast;
5538 	    }
5539 	}
5540     }
5541   /* If broadcast is supported in this instruction, we need to check if
5542      operand of one-element size isn't specified without broadcast.  */
5543   else if (t->opcode_modifier.broadcast && i.mem_operands)
5544     {
5545       /* Find memory operand.  */
5546       for (op = 0; op < i.operands; op++)
5547 	if (i.flags[op] & Operand_Mem)
5548 	  break;
5549       gas_assert (op < i.operands);
5550       /* Check size of the memory operand.  */
5551       if (match_broadcast_size (t, op))
5552 	{
5553 	  i.error = broadcast_needed;
5554 	  return 1;
5555 	}
5556     }
5557   else
5558     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
5559 
5560   /* Check if requested masking is supported.  */
5561   if (i.mask)
5562     {
5563       switch (t->opcode_modifier.masking)
5564 	{
5565 	case BOTH_MASKING:
5566 	  break;
5567 	case MERGING_MASKING:
5568 	  if (i.mask->zeroing)
5569 	    {
5570 	case 0:
5571 	      i.error = unsupported_masking;
5572 	      return 1;
5573 	    }
5574 	  break;
5575 	case DYNAMIC_MASKING:
5576 	  /* Memory destinations allow only merging masking.  */
5577 	  if (i.mask->zeroing && i.mem_operands)
5578 	    {
5579 	      /* Find memory operand.  */
5580 	      for (op = 0; op < i.operands; op++)
5581 		if (i.flags[op] & Operand_Mem)
5582 		  break;
5583 	      gas_assert (op < i.operands);
5584 	      if (op == i.operands - 1)
5585 		{
5586 		  i.error = unsupported_masking;
5587 		  return 1;
5588 		}
5589 	    }
5590 	  break;
5591 	default:
5592 	  abort ();
5593 	}
5594     }
5595 
5596   /* Check if masking is applied to dest operand.  */
5597   if (i.mask && (i.mask->operand != (int) (i.operands - 1)))
5598     {
5599       i.error = mask_not_on_destination;
5600       return 1;
5601     }
5602 
5603   /* Check RC/SAE.  */
5604   if (i.rounding)
5605     {
5606       if (!t->opcode_modifier.sae
5607 	  || (i.rounding->type != saeonly && !t->opcode_modifier.staticrounding))
5608 	{
5609 	  i.error = unsupported_rc_sae;
5610 	  return 1;
5611 	}
5612       /* If the instruction has several immediate operands and one of
5613 	 them is rounding, the rounding operand should be the last
5614 	 immediate operand.  */
5615       if (i.imm_operands > 1
5616 	  && i.rounding->operand != (int) (i.imm_operands - 1))
5617 	{
5618 	  i.error = rc_sae_operand_not_last_imm;
5619 	  return 1;
5620 	}
5621     }
5622 
5623   /* Check vector Disp8 operand.  */
5624   if (t->opcode_modifier.disp8memshift
5625       && i.disp_encoding != disp_encoding_32bit)
5626     {
5627       if (i.broadcast)
5628 	i.memshift = t->opcode_modifier.broadcast - 1;
5629       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
5630 	i.memshift = t->opcode_modifier.disp8memshift;
5631       else
5632 	{
5633 	  const i386_operand_type *type = NULL;
5634 
5635 	  i.memshift = 0;
5636 	  for (op = 0; op < i.operands; op++)
5637 	    if (i.flags[op] & Operand_Mem)
5638 	      {
5639 		if (t->opcode_modifier.evex == EVEXLIG)
5640 		  i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
5641 		else if (t->operand_types[op].bitfield.xmmword
5642 			 + t->operand_types[op].bitfield.ymmword
5643 			 + t->operand_types[op].bitfield.zmmword <= 1)
5644 		  type = &t->operand_types[op];
5645 		else if (!i.types[op].bitfield.unspecified)
5646 		  type = &i.types[op];
5647 	      }
5648 	    else if (i.types[op].bitfield.class == RegSIMD
5649 		     && t->opcode_modifier.evex != EVEXLIG)
5650 	      {
5651 		if (i.types[op].bitfield.zmmword)
5652 		  i.memshift = 6;
5653 		else if (i.types[op].bitfield.ymmword && i.memshift < 5)
5654 		  i.memshift = 5;
5655 		else if (i.types[op].bitfield.xmmword && i.memshift < 4)
5656 		  i.memshift = 4;
5657 	      }
5658 
5659 	  if (type)
5660 	    {
5661 	      if (type->bitfield.zmmword)
5662 		i.memshift = 6;
5663 	      else if (type->bitfield.ymmword)
5664 		i.memshift = 5;
5665 	      else if (type->bitfield.xmmword)
5666 		i.memshift = 4;
5667 	    }
5668 
5669 	  /* For the check in fits_in_disp8().  */
5670 	  if (i.memshift == 0)
5671 	    i.memshift = -1;
5672 	}
5673 
5674       for (op = 0; op < i.operands; op++)
5675 	if (operand_type_check (i.types[op], disp)
5676 	    && i.op[op].disps->X_op == O_constant)
5677 	  {
5678 	    if (fits_in_disp8 (i.op[op].disps->X_add_number))
5679 	      {
5680 		i.types[op].bitfield.disp8 = 1;
5681 		return 0;
5682 	      }
5683 	    i.types[op].bitfield.disp8 = 0;
5684 	  }
5685     }
5686 
5687   i.memshift = 0;
5688 
5689   return 0;
5690 }
5691 
5692 /* Check if operands are valid for the instruction.  Update VEX
5693    operand types.  */
5694 
5695 static int
5696 VEX_check_operands (const insn_template *t)
5697 {
5698   if (i.vec_encoding == vex_encoding_evex)
5699     {
5700       /* This instruction must be encoded with EVEX prefix.  */
5701       if (!is_evex_encoding (t))
5702 	{
5703 	  i.error = unsupported;
5704 	  return 1;
5705 	}
5706       return 0;
5707     }
5708 
5709   if (!t->opcode_modifier.vex)
5710     {
5711       /* This instruction template doesn't have VEX prefix.  */
5712       if (i.vec_encoding != vex_encoding_default)
5713 	{
5714 	  i.error = unsupported;
5715 	  return 1;
5716 	}
5717       return 0;
5718     }
5719 
5720   /* Check the special Imm4 cases; must be the first operand.  */
5721   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
5722     {
5723       if (i.op[0].imms->X_op != O_constant
5724 	  || !fits_in_imm4 (i.op[0].imms->X_add_number))
5725 	{
5726 	  i.error = bad_imm4;
5727 	  return 1;
5728 	}
5729 
5730       /* Turn off Imm<N> so that update_imm won't complain.  */
5731       operand_type_set (&i.types[0], 0);
5732     }
5733 
5734   return 0;
5735 }
5736 
5737 static const insn_template *
5738 match_template (char mnem_suffix)
5739 {
5740   /* Points to template once we've found it.  */
5741   const insn_template *t;
5742   i386_operand_type overlap0, overlap1, overlap2, overlap3;
5743   i386_operand_type overlap4;
5744   unsigned int found_reverse_match;
5745   i386_opcode_modifier suffix_check;
5746   i386_operand_type operand_types [MAX_OPERANDS];
5747   int addr_prefix_disp;
5748   unsigned int j, size_match, check_register;
5749   enum i386_error specific_error = 0;
5750 
5751 #if MAX_OPERANDS != 5
5752 # error "MAX_OPERANDS must be 5."
5753 #endif
5754 
5755   found_reverse_match = 0;
5756   addr_prefix_disp = -1;
5757 
5758   /* Prepare for mnemonic suffix check.  */
5759   memset (&suffix_check, 0, sizeof (suffix_check));
5760   switch (mnem_suffix)
5761     {
5762     case BYTE_MNEM_SUFFIX:
5763       suffix_check.no_bsuf = 1;
5764       break;
5765     case WORD_MNEM_SUFFIX:
5766       suffix_check.no_wsuf = 1;
5767       break;
5768     case SHORT_MNEM_SUFFIX:
5769       suffix_check.no_ssuf = 1;
5770       break;
5771     case LONG_MNEM_SUFFIX:
5772       suffix_check.no_lsuf = 1;
5773       break;
5774     case QWORD_MNEM_SUFFIX:
5775       suffix_check.no_qsuf = 1;
5776       break;
5777     default:
5778       /* NB: In Intel syntax, normally we can check for memory operand
5779 	 size when there is no mnemonic suffix.  But jmp and call have
5780 	 2 different encodings with Dword memory operand size, one with
5781 	 No_ldSuf and the other without.  i.suffix is set to
5782 	 LONG_DOUBLE_MNEM_SUFFIX to skip the one with No_ldSuf.  */
5783       if (i.suffix == LONG_DOUBLE_MNEM_SUFFIX)
5784 	suffix_check.no_ldsuf = 1;
5785     }
5786 
5787   /* Must have right number of operands.  */
5788   i.error = number_of_operands_mismatch;
5789 
5790   for (t = current_templates->start; t < current_templates->end; t++)
5791     {
5792       addr_prefix_disp = -1;
5793       found_reverse_match = 0;
5794 
5795       if (i.operands != t->operands)
5796 	continue;
5797 
5798       /* Check processor support.  */
5799       i.error = unsupported;
5800       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
5801 	continue;
5802 
5803       /* Check AT&T mnemonic.   */
5804       i.error = unsupported_with_intel_mnemonic;
5805       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
5806 	continue;
5807 
5808       /* Check AT&T/Intel syntax and Intel64/AMD64 ISA.   */
5809       i.error = unsupported_syntax;
5810       if ((intel_syntax && t->opcode_modifier.attsyntax)
5811 	  || (!intel_syntax && t->opcode_modifier.intelsyntax)
5812 	  || (intel64 && t->opcode_modifier.amd64)
5813 	  || (!intel64 && t->opcode_modifier.intel64))
5814 	continue;
5815 
5816       /* Check the suffix.  */
5817       i.error = invalid_instruction_suffix;
5818       if ((t->opcode_modifier.no_bsuf && suffix_check.no_bsuf)
5819 	  || (t->opcode_modifier.no_wsuf && suffix_check.no_wsuf)
5820 	  || (t->opcode_modifier.no_lsuf && suffix_check.no_lsuf)
5821 	  || (t->opcode_modifier.no_ssuf && suffix_check.no_ssuf)
5822 	  || (t->opcode_modifier.no_qsuf && suffix_check.no_qsuf)
5823 	  || (t->opcode_modifier.no_ldsuf && suffix_check.no_ldsuf))
5824 	continue;
5825 
5826       size_match = operand_size_match (t);
5827       if (!size_match)
5828 	continue;
5829 
5830       /* This is intentionally not
5831 
5832 	 if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
5833 
5834 	 as the case of a missing * on the operand is accepted (perhaps with
5835 	 a warning, issued further down).  */
5836       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
5837 	{
5838 	  i.error = operand_type_mismatch;
5839 	  continue;
5840 	}
5841 
5842       for (j = 0; j < MAX_OPERANDS; j++)
5843 	operand_types[j] = t->operand_types[j];
5844 
5845       /* In general, don't allow 64-bit operands in 32-bit mode.  */
5846       if (i.suffix == QWORD_MNEM_SUFFIX
5847 	  && flag_code != CODE_64BIT
5848 	  && (intel_syntax
5849 	      ? (!t->opcode_modifier.ignoresize
5850 	         && !t->opcode_modifier.broadcast
5851 		 && !intel_float_operand (t->name))
5852 	      : intel_float_operand (t->name) != 2)
5853 	  && ((operand_types[0].bitfield.class != RegMMX
5854 	       && operand_types[0].bitfield.class != RegSIMD)
5855 	      || (operand_types[t->operands > 1].bitfield.class != RegMMX
5856 		  && operand_types[t->operands > 1].bitfield.class != RegSIMD))
5857 	  && (t->base_opcode != 0x0fc7
5858 	      || t->extension_opcode != 1 /* cmpxchg8b */))
5859 	continue;
5860 
5861       /* In general, don't allow 32-bit operands on pre-386.  */
5862       else if (i.suffix == LONG_MNEM_SUFFIX
5863 	       && !cpu_arch_flags.bitfield.cpui386
5864 	       && (intel_syntax
5865 		   ? (!t->opcode_modifier.ignoresize
5866 		      && !intel_float_operand (t->name))
5867 		   : intel_float_operand (t->name) != 2)
5868 	       && ((operand_types[0].bitfield.class != RegMMX
5869 		    && operand_types[0].bitfield.class != RegSIMD)
5870 		   || (operand_types[t->operands > 1].bitfield.class != RegMMX
5871 		       && operand_types[t->operands > 1].bitfield.class
5872 			  != RegSIMD)))
5873 	continue;
5874 
5875       /* Do not verify operands when there are none.  */
5876       else
5877 	{
5878 	  if (!t->operands)
5879 	    /* We've found a match; break out of loop.  */
5880 	    break;
5881 	}
5882 
5883       if (!t->opcode_modifier.jump
5884 	  || t->opcode_modifier.jump == JUMP_ABSOLUTE)
5885 	{
5886 	  /* There should be only one Disp operand.  */
5887 	  for (j = 0; j < MAX_OPERANDS; j++)
5888 	    if (operand_type_check (operand_types[j], disp))
5889 	      break;
5890 	  if (j < MAX_OPERANDS)
5891 	    {
5892 	      bfd_boolean override = (i.prefix[ADDR_PREFIX] != 0);
5893 
5894 	      addr_prefix_disp = j;
5895 
5896 	      /* Address size prefix will turn Disp64/Disp32S/Disp32/Disp16
5897 		 operand into Disp32/Disp32/Disp16/Disp32 operand.  */
5898 	      switch (flag_code)
5899 		{
5900 		case CODE_16BIT:
5901 		  override = !override;
5902 		  /* Fall through.  */
5903 		case CODE_32BIT:
5904 		  if (operand_types[j].bitfield.disp32
5905 		      && operand_types[j].bitfield.disp16)
5906 		    {
5907 		      operand_types[j].bitfield.disp16 = override;
5908 		      operand_types[j].bitfield.disp32 = !override;
5909 		    }
5910 		  operand_types[j].bitfield.disp32s = 0;
5911 		  operand_types[j].bitfield.disp64 = 0;
5912 		  break;
5913 
5914 		case CODE_64BIT:
5915 		  if (operand_types[j].bitfield.disp32s
5916 		      || operand_types[j].bitfield.disp64)
5917 		    {
5918 		      operand_types[j].bitfield.disp64 &= !override;
5919 		      operand_types[j].bitfield.disp32s &= !override;
5920 		      operand_types[j].bitfield.disp32 = override;
5921 		    }
5922 		  operand_types[j].bitfield.disp16 = 0;
5923 		  break;
5924 		}
5925 	    }
5926 	}
5927 
5928       /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
5929       if (i.reloc[0] == BFD_RELOC_386_GOT32 && t->base_opcode == 0xa0)
5930 	continue;
5931 
5932       /* We check register size if needed.  */
5933       if (t->opcode_modifier.checkregsize)
5934 	{
5935 	  check_register = (1 << t->operands) - 1;
5936 	  if (i.broadcast)
5937 	    check_register &= ~(1 << i.broadcast->operand);
5938 	}
5939       else
5940 	check_register = 0;
5941 
5942       overlap0 = operand_type_and (i.types[0], operand_types[0]);
5943       switch (t->operands)
5944 	{
5945 	case 1:
5946 	  if (!operand_type_match (overlap0, i.types[0]))
5947 	    continue;
5948 	  break;
5949 	case 2:
5950 	  /* xchg %eax, %eax is a special case. It is an alias for nop
5951 	     only in 32bit mode and we can use opcode 0x90.  In 64bit
5952 	     mode, we can't use 0x90 for xchg %eax, %eax since it should
5953 	     zero-extend %eax to %rax.  */
5954 	  if (flag_code == CODE_64BIT
5955 	      && t->base_opcode == 0x90
5956 	      && i.types[0].bitfield.instance == Accum
5957 	      && i.types[0].bitfield.dword
5958 	      && i.types[1].bitfield.instance == Accum
5959 	      && i.types[1].bitfield.dword)
5960 	    continue;
5961 	  /* xrelease mov %eax, <disp> is another special case. It must not
5962 	     match the accumulator-only encoding of mov.  */
5963 	  if (flag_code != CODE_64BIT
5964 	      && i.hle_prefix
5965 	      && t->base_opcode == 0xa0
5966 	      && i.types[0].bitfield.instance == Accum
5967 	      && (i.flags[1] & Operand_Mem))
5968 	    continue;
5969 	  /* Fall through.  */
5970 
5971 	case 3:
5972 	  if (!(size_match & MATCH_STRAIGHT))
5973 	    goto check_reverse;
5974 	  /* Reverse direction of operands if swapping is possible in the first
5975 	     place (operands need to be symmetric) and
5976 	     - the load form is requested, and the template is a store form,
5977 	     - the store form is requested, and the template is a load form,
5978 	     - the non-default (swapped) form is requested.  */
5979 	  overlap1 = operand_type_and (operand_types[0], operand_types[1]);
5980 	  if (t->opcode_modifier.d && i.reg_operands == i.operands
5981 	      && !operand_type_all_zero (&overlap1))
5982 	    switch (i.dir_encoding)
5983 	      {
5984 	      case dir_encoding_load:
5985 		if (operand_type_check (operand_types[i.operands - 1], anymem)
5986 		    || t->opcode_modifier.regmem)
5987 		  goto check_reverse;
5988 		break;
5989 
5990 	      case dir_encoding_store:
5991 		if (!operand_type_check (operand_types[i.operands - 1], anymem)
5992 		    && !t->opcode_modifier.regmem)
5993 		  goto check_reverse;
5994 		break;
5995 
5996 	      case dir_encoding_swap:
5997 		goto check_reverse;
5998 
5999 	      case dir_encoding_default:
6000 		break;
6001 	      }
6002 	  /* If we want store form, we skip the current load.  */
6003 	  if ((i.dir_encoding == dir_encoding_store
6004 	       || i.dir_encoding == dir_encoding_swap)
6005 	      && i.mem_operands == 0
6006 	      && t->opcode_modifier.load)
6007 	    continue;
6008 	  /* Fall through.  */
6009 	case 4:
6010 	case 5:
6011 	  overlap1 = operand_type_and (i.types[1], operand_types[1]);
6012 	  if (!operand_type_match (overlap0, i.types[0])
6013 	      || !operand_type_match (overlap1, i.types[1])
6014 	      || ((check_register & 3) == 3
6015 		  && !operand_type_register_match (i.types[0],
6016 						   operand_types[0],
6017 						   i.types[1],
6018 						   operand_types[1])))
6019 	    {
6020 	      /* Check if other direction is valid ...  */
6021 	      if (!t->opcode_modifier.d)
6022 		continue;
6023 
6024 check_reverse:
6025 	      if (!(size_match & MATCH_REVERSE))
6026 		continue;
6027 	      /* Try reversing direction of operands.  */
6028 	      overlap0 = operand_type_and (i.types[0], operand_types[i.operands - 1]);
6029 	      overlap1 = operand_type_and (i.types[i.operands - 1], operand_types[0]);
6030 	      if (!operand_type_match (overlap0, i.types[0])
6031 		  || !operand_type_match (overlap1, i.types[i.operands - 1])
6032 		  || (check_register
6033 		      && !operand_type_register_match (i.types[0],
6034 						       operand_types[i.operands - 1],
6035 						       i.types[i.operands - 1],
6036 						       operand_types[0])))
6037 		{
6038 		  /* Does not match either direction.  */
6039 		  continue;
6040 		}
6041 	      /* found_reverse_match holds which of D or FloatR
6042 		 we've found.  */
6043 	      if (!t->opcode_modifier.d)
6044 		found_reverse_match = 0;
6045 	      else if (operand_types[0].bitfield.tbyte)
6046 		found_reverse_match = Opcode_FloatD;
6047 	      else if (operand_types[0].bitfield.xmmword
6048 		       || operand_types[i.operands - 1].bitfield.xmmword
6049 		       || operand_types[0].bitfield.class == RegMMX
6050 		       || operand_types[i.operands - 1].bitfield.class == RegMMX
6051 		       || is_any_vex_encoding(t))
6052 		found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6053 				      ? Opcode_SIMD_FloatD : Opcode_SIMD_IntD;
6054 	      else
6055 		found_reverse_match = Opcode_D;
6056 	      if (t->opcode_modifier.floatr)
6057 		found_reverse_match |= Opcode_FloatR;
6058 	    }
6059 	  else
6060 	    {
6061 	      /* Found a forward 2 operand match here.  */
6062 	      switch (t->operands)
6063 		{
6064 		case 5:
6065 		  overlap4 = operand_type_and (i.types[4],
6066 					       operand_types[4]);
6067 		  /* Fall through.  */
6068 		case 4:
6069 		  overlap3 = operand_type_and (i.types[3],
6070 					       operand_types[3]);
6071 		  /* Fall through.  */
6072 		case 3:
6073 		  overlap2 = operand_type_and (i.types[2],
6074 					       operand_types[2]);
6075 		  break;
6076 		}
6077 
6078 	      switch (t->operands)
6079 		{
6080 		case 5:
6081 		  if (!operand_type_match (overlap4, i.types[4])
6082 		      || !operand_type_register_match (i.types[3],
6083 						       operand_types[3],
6084 						       i.types[4],
6085 						       operand_types[4]))
6086 		    continue;
6087 		  /* Fall through.  */
6088 		case 4:
6089 		  if (!operand_type_match (overlap3, i.types[3])
6090 		      || ((check_register & 0xa) == 0xa
6091 			  && !operand_type_register_match (i.types[1],
6092 							    operand_types[1],
6093 							    i.types[3],
6094 							    operand_types[3]))
6095 		      || ((check_register & 0xc) == 0xc
6096 			  && !operand_type_register_match (i.types[2],
6097 							    operand_types[2],
6098 							    i.types[3],
6099 							    operand_types[3])))
6100 		    continue;
6101 		  /* Fall through.  */
6102 		case 3:
6103 		  /* Here we make use of the fact that there are no
6104 		     reverse match 3 operand instructions.  */
6105 		  if (!operand_type_match (overlap2, i.types[2])
6106 		      || ((check_register & 5) == 5
6107 			  && !operand_type_register_match (i.types[0],
6108 							    operand_types[0],
6109 							    i.types[2],
6110 							    operand_types[2]))
6111 		      || ((check_register & 6) == 6
6112 			  && !operand_type_register_match (i.types[1],
6113 							    operand_types[1],
6114 							    i.types[2],
6115 							    operand_types[2])))
6116 		    continue;
6117 		  break;
6118 		}
6119 	    }
6120 	  /* Found either forward/reverse 2, 3 or 4 operand match here:
6121 	     slip through to break.  */
6122 	}
6123 
6124       /* Check if vector and VEX operands are valid.  */
6125       if (check_VecOperands (t) || VEX_check_operands (t))
6126 	{
6127 	  specific_error = i.error;
6128 	  continue;
6129 	}
6130 
6131       /* We've found a match; break out of loop.  */
6132       break;
6133     }
6134 
6135   if (t == current_templates->end)
6136     {
6137       /* We found no match.  */
6138       const char *err_msg;
6139       switch (specific_error ? specific_error : i.error)
6140 	{
6141 	default:
6142 	  abort ();
6143 	case operand_size_mismatch:
6144 	  err_msg = _("operand size mismatch");
6145 	  break;
6146 	case operand_type_mismatch:
6147 	  err_msg = _("operand type mismatch");
6148 	  break;
6149 	case register_type_mismatch:
6150 	  err_msg = _("register type mismatch");
6151 	  break;
6152 	case number_of_operands_mismatch:
6153 	  err_msg = _("number of operands mismatch");
6154 	  break;
6155 	case invalid_instruction_suffix:
6156 	  err_msg = _("invalid instruction suffix");
6157 	  break;
6158 	case bad_imm4:
6159 	  err_msg = _("constant doesn't fit in 4 bits");
6160 	  break;
6161 	case unsupported_with_intel_mnemonic:
6162 	  err_msg = _("unsupported with Intel mnemonic");
6163 	  break;
6164 	case unsupported_syntax:
6165 	  err_msg = _("unsupported syntax");
6166 	  break;
6167 	case unsupported:
6168 	  as_bad (_("unsupported instruction `%s'"),
6169 		  current_templates->start->name);
6170 	  return NULL;
6171 	case invalid_vsib_address:
6172 	  err_msg = _("invalid VSIB address");
6173 	  break;
6174 	case invalid_vector_register_set:
6175 	  err_msg = _("mask, index, and destination registers must be distinct");
6176 	  break;
6177 	case unsupported_vector_index_register:
6178 	  err_msg = _("unsupported vector index register");
6179 	  break;
6180 	case unsupported_broadcast:
6181 	  err_msg = _("unsupported broadcast");
6182 	  break;
6183 	case broadcast_needed:
6184 	  err_msg = _("broadcast is needed for operand of such type");
6185 	  break;
6186 	case unsupported_masking:
6187 	  err_msg = _("unsupported masking");
6188 	  break;
6189 	case mask_not_on_destination:
6190 	  err_msg = _("mask not on destination operand");
6191 	  break;
6192 	case no_default_mask:
6193 	  err_msg = _("default mask isn't allowed");
6194 	  break;
6195 	case unsupported_rc_sae:
6196 	  err_msg = _("unsupported static rounding/sae");
6197 	  break;
6198 	case rc_sae_operand_not_last_imm:
6199 	  if (intel_syntax)
6200 	    err_msg = _("RC/SAE operand must precede immediate operands");
6201 	  else
6202 	    err_msg = _("RC/SAE operand must follow immediate operands");
6203 	  break;
6204 	case invalid_register_operand:
6205 	  err_msg = _("invalid register operand");
6206 	  break;
6207 	}
6208       as_bad (_("%s for `%s'"), err_msg,
6209 	      current_templates->start->name);
6210       return NULL;
6211     }
6212 
6213   if (!quiet_warnings)
6214     {
6215       if (!intel_syntax
6216 	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6217 	as_warn (_("indirect %s without `*'"), t->name);
6218 
6219       if (t->opcode_modifier.isprefix
6220 	  && t->opcode_modifier.ignoresize)
6221 	{
6222 	  /* Warn them that a data or address size prefix doesn't
6223 	     affect assembly of the next line of code.  */
6224 	  as_warn (_("stand-alone `%s' prefix"), t->name);
6225 	}
6226     }
6227 
6228   /* Copy the template we found.  */
6229   i.tm = *t;
6230 
6231   if (addr_prefix_disp != -1)
6232     i.tm.operand_types[addr_prefix_disp]
6233       = operand_types[addr_prefix_disp];
6234 
6235   if (found_reverse_match)
6236     {
6237       /* If we found a reverse match we must alter the opcode direction
6238 	 bit and clear/flip the regmem modifier one.  found_reverse_match
6239 	 holds bits to change (different for int & float insns).  */
6240 
6241       i.tm.base_opcode ^= found_reverse_match;
6242 
6243       i.tm.operand_types[0] = operand_types[i.operands - 1];
6244       i.tm.operand_types[i.operands - 1] = operand_types[0];
6245 
6246       /* Certain SIMD insns have their load forms specified in the opcode
6247 	 table, and hence we need to _set_ RegMem instead of clearing it.
6248 	 We need to avoid setting the bit though on insns like KMOVW.  */
6249       i.tm.opcode_modifier.regmem
6250 	= i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
6251 	  && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
6252 	  && !i.tm.opcode_modifier.regmem;
6253     }
6254 
6255   return t;
6256 }
6257 
6258 static int
6259 check_string (void)
6260 {
6261   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
6262   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
6263 
6264   if (i.seg[op] != NULL && i.seg[op] != &es)
6265     {
6266       as_bad (_("`%s' operand %u must use `%ses' segment"),
6267 	      i.tm.name,
6268 	      intel_syntax ? i.tm.operands - es_op : es_op + 1,
6269 	      register_prefix);
6270       return 0;
6271     }
6272 
6273   /* There's only ever one segment override allowed per instruction.
6274      This instruction possibly has a legal segment override on the
6275      second operand, so copy the segment to where non-string
6276      instructions store it, allowing common code.  */
6277   i.seg[op] = i.seg[1];
6278 
6279   return 1;
6280 }
6281 
6282 static int
6283 process_suffix (void)
6284 {
6285   /* If matched instruction specifies an explicit instruction mnemonic
6286      suffix, use it.  */
6287   if (i.tm.opcode_modifier.size == SIZE16)
6288     i.suffix = WORD_MNEM_SUFFIX;
6289   else if (i.tm.opcode_modifier.size == SIZE32)
6290     i.suffix = LONG_MNEM_SUFFIX;
6291   else if (i.tm.opcode_modifier.size == SIZE64)
6292     i.suffix = QWORD_MNEM_SUFFIX;
6293   else if (i.reg_operands
6294 	   && (i.operands > 1 || i.types[0].bitfield.class == Reg))
6295     {
6296       /* If there's no instruction mnemonic suffix we try to invent one
6297 	 based on GPR operands.  */
6298       if (!i.suffix)
6299 	{
6300 	  /* We take i.suffix from the last register operand specified,
6301 	     Destination register type is more significant than source
6302 	     register type.  crc32 in SSE4.2 prefers source register
6303 	     type. */
6304 	  if (i.tm.base_opcode == 0xf20f38f0
6305 	      && i.types[0].bitfield.class == Reg)
6306 	    {
6307 	      if (i.types[0].bitfield.byte)
6308 		i.suffix = BYTE_MNEM_SUFFIX;
6309 	      else if (i.types[0].bitfield.word)
6310 		i.suffix = WORD_MNEM_SUFFIX;
6311 	      else if (i.types[0].bitfield.dword)
6312 		i.suffix = LONG_MNEM_SUFFIX;
6313 	      else if (i.types[0].bitfield.qword)
6314 		i.suffix = QWORD_MNEM_SUFFIX;
6315 	    }
6316 
6317 	  if (!i.suffix)
6318 	    {
6319 	      int op;
6320 
6321 	      if (i.tm.base_opcode == 0xf20f38f0)
6322 		{
6323 		  /* We have to know the operand size for crc32.  */
6324 		  as_bad (_("ambiguous memory operand size for `%s`"),
6325 			  i.tm.name);
6326 		  return 0;
6327 		}
6328 
6329 	      for (op = i.operands; --op >= 0;)
6330 		if (i.tm.operand_types[op].bitfield.instance == InstanceNone
6331 		    || i.tm.operand_types[op].bitfield.instance == Accum)
6332 		  {
6333 		    if (i.types[op].bitfield.class != Reg)
6334 		      continue;
6335 		    if (i.types[op].bitfield.byte)
6336 		      i.suffix = BYTE_MNEM_SUFFIX;
6337 		    else if (i.types[op].bitfield.word)
6338 		      i.suffix = WORD_MNEM_SUFFIX;
6339 		    else if (i.types[op].bitfield.dword)
6340 		      i.suffix = LONG_MNEM_SUFFIX;
6341 		    else if (i.types[op].bitfield.qword)
6342 		      i.suffix = QWORD_MNEM_SUFFIX;
6343 		    else
6344 		      continue;
6345 		    break;
6346 		  }
6347 	    }
6348 	}
6349       else if (i.suffix == BYTE_MNEM_SUFFIX)
6350 	{
6351 	  if (intel_syntax
6352 	      && i.tm.opcode_modifier.ignoresize
6353 	      && i.tm.opcode_modifier.no_bsuf)
6354 	    i.suffix = 0;
6355 	  else if (!check_byte_reg ())
6356 	    return 0;
6357 	}
6358       else if (i.suffix == LONG_MNEM_SUFFIX)
6359 	{
6360 	  if (intel_syntax
6361 	      && i.tm.opcode_modifier.ignoresize
6362 	      && i.tm.opcode_modifier.no_lsuf
6363 	      && !i.tm.opcode_modifier.todword
6364 	      && !i.tm.opcode_modifier.toqword)
6365 	    i.suffix = 0;
6366 	  else if (!check_long_reg ())
6367 	    return 0;
6368 	}
6369       else if (i.suffix == QWORD_MNEM_SUFFIX)
6370 	{
6371 	  if (intel_syntax
6372 	      && i.tm.opcode_modifier.ignoresize
6373 	      && i.tm.opcode_modifier.no_qsuf
6374 	      && !i.tm.opcode_modifier.todword
6375 	      && !i.tm.opcode_modifier.toqword)
6376 	    i.suffix = 0;
6377 	  else if (!check_qword_reg ())
6378 	    return 0;
6379 	}
6380       else if (i.suffix == WORD_MNEM_SUFFIX)
6381 	{
6382 	  if (intel_syntax
6383 	      && i.tm.opcode_modifier.ignoresize
6384 	      && i.tm.opcode_modifier.no_wsuf)
6385 	    i.suffix = 0;
6386 	  else if (!check_word_reg ())
6387 	    return 0;
6388 	}
6389       else if (intel_syntax && i.tm.opcode_modifier.ignoresize)
6390 	/* Do nothing if the instruction is going to ignore the prefix.  */
6391 	;
6392       else
6393 	abort ();
6394     }
6395   else if (i.tm.opcode_modifier.defaultsize
6396 	   && !i.suffix
6397 	   /* exclude fldenv/frstor/fsave/fstenv */
6398 	   && i.tm.opcode_modifier.no_ssuf
6399 	   /* exclude sysret */
6400 	   && i.tm.base_opcode != 0x0f07)
6401     {
6402       i.suffix = stackop_size;
6403       if (stackop_size == LONG_MNEM_SUFFIX)
6404 	{
6405 	  /* stackop_size is set to LONG_MNEM_SUFFIX for the
6406 	     .code16gcc directive to support 16-bit mode with
6407 	     32-bit address.  For IRET without a suffix, generate
6408 	     16-bit IRET (opcode 0xcf) to return from an interrupt
6409 	     handler.  */
6410 	  if (i.tm.base_opcode == 0xcf)
6411 	    {
6412 	      i.suffix = WORD_MNEM_SUFFIX;
6413 	      as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
6414 	    }
6415 	  /* Warn about changed behavior for segment register push/pop.  */
6416 	  else if ((i.tm.base_opcode | 1) == 0x07)
6417 	    as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
6418 		     i.tm.name);
6419 	}
6420     }
6421   else if (intel_syntax
6422 	   && !i.suffix
6423 	   && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
6424 	       || i.tm.opcode_modifier.jump == JUMP_BYTE
6425 	       || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
6426 	       || (i.tm.base_opcode == 0x0f01 /* [ls][gi]dt */
6427 		   && i.tm.extension_opcode <= 3)))
6428     {
6429       switch (flag_code)
6430 	{
6431 	case CODE_64BIT:
6432 	  if (!i.tm.opcode_modifier.no_qsuf)
6433 	    {
6434 	      i.suffix = QWORD_MNEM_SUFFIX;
6435 	      break;
6436 	    }
6437 	  /* Fall through.  */
6438 	case CODE_32BIT:
6439 	  if (!i.tm.opcode_modifier.no_lsuf)
6440 	    i.suffix = LONG_MNEM_SUFFIX;
6441 	  break;
6442 	case CODE_16BIT:
6443 	  if (!i.tm.opcode_modifier.no_wsuf)
6444 	    i.suffix = WORD_MNEM_SUFFIX;
6445 	  break;
6446 	}
6447     }
6448 
6449   if (!i.suffix)
6450     {
6451       if (!intel_syntax)
6452 	{
6453 	  if (i.tm.opcode_modifier.w)
6454 	    {
6455 	      as_bad (_("no instruction mnemonic suffix given and "
6456 			"no register operands; can't size instruction"));
6457 	      return 0;
6458 	    }
6459 	}
6460       else
6461 	{
6462 	  unsigned int suffixes;
6463 
6464 	  suffixes = !i.tm.opcode_modifier.no_bsuf;
6465 	  if (!i.tm.opcode_modifier.no_wsuf)
6466 	    suffixes |= 1 << 1;
6467 	  if (!i.tm.opcode_modifier.no_lsuf)
6468 	    suffixes |= 1 << 2;
6469 	  if (!i.tm.opcode_modifier.no_ldsuf)
6470 	    suffixes |= 1 << 3;
6471 	  if (!i.tm.opcode_modifier.no_ssuf)
6472 	    suffixes |= 1 << 4;
6473 	  if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
6474 	    suffixes |= 1 << 5;
6475 
6476 	  /* There are more than suffix matches.  */
6477 	  if (i.tm.opcode_modifier.w
6478 	      || ((suffixes & (suffixes - 1))
6479 		  && !i.tm.opcode_modifier.defaultsize
6480 		  && !i.tm.opcode_modifier.ignoresize))
6481 	    {
6482 	      as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
6483 	      return 0;
6484 	    }
6485 	}
6486     }
6487 
6488   /* Change the opcode based on the operand size given by i.suffix.  */
6489   switch (i.suffix)
6490     {
6491     /* Size floating point instruction.  */
6492     case LONG_MNEM_SUFFIX:
6493       if (i.tm.opcode_modifier.floatmf)
6494 	{
6495 	  i.tm.base_opcode ^= 4;
6496 	  break;
6497 	}
6498     /* fall through */
6499     case WORD_MNEM_SUFFIX:
6500     case QWORD_MNEM_SUFFIX:
6501       /* It's not a byte, select word/dword operation.  */
6502       if (i.tm.opcode_modifier.w)
6503 	{
6504 	  if (i.tm.opcode_modifier.shortform)
6505 	    i.tm.base_opcode |= 8;
6506 	  else
6507 	    i.tm.base_opcode |= 1;
6508 	}
6509     /* fall through */
6510     case SHORT_MNEM_SUFFIX:
6511       /* Now select between word & dword operations via the operand
6512 	 size prefix, except for instructions that will ignore this
6513 	 prefix anyway.  */
6514       if (i.reg_operands > 0
6515 	  && i.types[0].bitfield.class == Reg
6516 	  && i.tm.opcode_modifier.addrprefixopreg
6517 	  && (i.tm.operand_types[0].bitfield.instance == Accum
6518 	      || i.operands == 1))
6519 	{
6520 	  /* The address size override prefix changes the size of the
6521 	     first operand.  */
6522 	  if ((flag_code == CODE_32BIT
6523 	       && i.op[0].regs->reg_type.bitfield.word)
6524 	      || (flag_code != CODE_32BIT
6525 		  && i.op[0].regs->reg_type.bitfield.dword))
6526 	    if (!add_prefix (ADDR_PREFIX_OPCODE))
6527 	      return 0;
6528 	}
6529       else if (i.suffix != QWORD_MNEM_SUFFIX
6530 	       && !i.tm.opcode_modifier.ignoresize
6531 	       && !i.tm.opcode_modifier.floatmf
6532 	       && !is_any_vex_encoding (&i.tm)
6533 	       && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
6534 		   || (flag_code == CODE_64BIT
6535 		       && i.tm.opcode_modifier.jump == JUMP_BYTE)))
6536 	{
6537 	  unsigned int prefix = DATA_PREFIX_OPCODE;
6538 
6539 	  if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
6540 	    prefix = ADDR_PREFIX_OPCODE;
6541 
6542 	  if (!add_prefix (prefix))
6543 	    return 0;
6544 	}
6545 
6546       /* Set mode64 for an operand.  */
6547       if (i.suffix == QWORD_MNEM_SUFFIX
6548 	  && flag_code == CODE_64BIT
6549 	  && !i.tm.opcode_modifier.norex64
6550 	  /* Special case for xchg %rax,%rax.  It is NOP and doesn't
6551 	     need rex64. */
6552 	  && ! (i.operands == 2
6553 		&& i.tm.base_opcode == 0x90
6554 		&& i.tm.extension_opcode == None
6555 		&& i.types[0].bitfield.instance == Accum
6556 		&& i.types[0].bitfield.qword
6557 		&& i.types[1].bitfield.instance == Accum
6558 		&& i.types[1].bitfield.qword))
6559 	i.rex |= REX_W;
6560 
6561       break;
6562     }
6563 
6564   if (i.reg_operands != 0
6565       && i.operands > 1
6566       && i.tm.opcode_modifier.addrprefixopreg
6567       && i.tm.operand_types[0].bitfield.instance != Accum)
6568     {
6569       /* Check invalid register operand when the address size override
6570 	 prefix changes the size of register operands.  */
6571       unsigned int op;
6572       enum { need_word, need_dword, need_qword } need;
6573 
6574       if (flag_code == CODE_32BIT)
6575 	need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
6576       else
6577 	{
6578 	  if (i.prefix[ADDR_PREFIX])
6579 	    need = need_dword;
6580 	  else
6581 	    need = flag_code == CODE_64BIT ? need_qword : need_word;
6582 	}
6583 
6584       for (op = 0; op < i.operands; op++)
6585 	if (i.types[op].bitfield.class == Reg
6586 	    && ((need == need_word
6587 		 && !i.op[op].regs->reg_type.bitfield.word)
6588 		|| (need == need_dword
6589 		    && !i.op[op].regs->reg_type.bitfield.dword)
6590 		|| (need == need_qword
6591 		    && !i.op[op].regs->reg_type.bitfield.qword)))
6592 	  {
6593 	    as_bad (_("invalid register operand size for `%s'"),
6594 		    i.tm.name);
6595 	    return 0;
6596 	  }
6597     }
6598 
6599   return 1;
6600 }
6601 
6602 static int
6603 check_byte_reg (void)
6604 {
6605   int op;
6606 
6607   for (op = i.operands; --op >= 0;)
6608     {
6609       /* Skip non-register operands. */
6610       if (i.types[op].bitfield.class != Reg)
6611 	continue;
6612 
6613       /* If this is an eight bit register, it's OK.  If it's the 16 or
6614 	 32 bit version of an eight bit register, we will just use the
6615 	 low portion, and that's OK too.  */
6616       if (i.types[op].bitfield.byte)
6617 	continue;
6618 
6619       /* I/O port address operands are OK too.  */
6620       if (i.tm.operand_types[op].bitfield.instance == RegD
6621 	  && i.tm.operand_types[op].bitfield.word)
6622 	continue;
6623 
6624       /* crc32 doesn't generate this warning.  */
6625       if (i.tm.base_opcode == 0xf20f38f0)
6626 	continue;
6627 
6628       if ((i.types[op].bitfield.word
6629 	   || i.types[op].bitfield.dword
6630 	   || i.types[op].bitfield.qword)
6631 	  && i.op[op].regs->reg_num < 4
6632 	  /* Prohibit these changes in 64bit mode, since the lowering
6633 	     would be more complicated.  */
6634 	  && flag_code != CODE_64BIT)
6635 	{
6636 #if REGISTER_WARNINGS
6637 	  if (!quiet_warnings)
6638 	    as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
6639 		     register_prefix,
6640 		     (i.op[op].regs + (i.types[op].bitfield.word
6641 				       ? REGNAM_AL - REGNAM_AX
6642 				       : REGNAM_AL - REGNAM_EAX))->reg_name,
6643 		     register_prefix,
6644 		     i.op[op].regs->reg_name,
6645 		     i.suffix);
6646 #endif
6647 	  continue;
6648 	}
6649       /* Any other register is bad.  */
6650       if (i.types[op].bitfield.class == Reg
6651 	  || i.types[op].bitfield.class == RegMMX
6652 	  || i.types[op].bitfield.class == RegSIMD
6653 	  || i.types[op].bitfield.class == SReg
6654 	  || i.types[op].bitfield.class == RegCR
6655 	  || i.types[op].bitfield.class == RegDR
6656 	  || i.types[op].bitfield.class == RegTR)
6657 	{
6658 	  as_bad (_("`%s%s' not allowed with `%s%c'"),
6659 		  register_prefix,
6660 		  i.op[op].regs->reg_name,
6661 		  i.tm.name,
6662 		  i.suffix);
6663 	  return 0;
6664 	}
6665     }
6666   return 1;
6667 }
6668 
6669 static int
6670 check_long_reg (void)
6671 {
6672   int op;
6673 
6674   for (op = i.operands; --op >= 0;)
6675     /* Skip non-register operands. */
6676     if (i.types[op].bitfield.class != Reg)
6677       continue;
6678     /* Reject eight bit registers, except where the template requires
6679        them. (eg. movzb)  */
6680     else if (i.types[op].bitfield.byte
6681 	     && (i.tm.operand_types[op].bitfield.class == Reg
6682 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6683 	     && (i.tm.operand_types[op].bitfield.word
6684 		 || i.tm.operand_types[op].bitfield.dword))
6685       {
6686 	as_bad (_("`%s%s' not allowed with `%s%c'"),
6687 		register_prefix,
6688 		i.op[op].regs->reg_name,
6689 		i.tm.name,
6690 		i.suffix);
6691 	return 0;
6692       }
6693     /* Warn if the e prefix on a general reg is missing.  */
6694     else if ((!quiet_warnings || flag_code == CODE_64BIT)
6695 	     && i.types[op].bitfield.word
6696 	     && (i.tm.operand_types[op].bitfield.class == Reg
6697 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6698 	     && i.tm.operand_types[op].bitfield.dword)
6699       {
6700 	/* Prohibit these changes in the 64bit mode, since the
6701 	   lowering is more complicated.  */
6702 	if (flag_code == CODE_64BIT)
6703 	  {
6704 	    as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6705 		    register_prefix, i.op[op].regs->reg_name,
6706 		    i.suffix);
6707 	    return 0;
6708 	  }
6709 #if REGISTER_WARNINGS
6710 	as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
6711 		 register_prefix,
6712 		 (i.op[op].regs + REGNAM_EAX - REGNAM_AX)->reg_name,
6713 		 register_prefix, i.op[op].regs->reg_name, i.suffix);
6714 #endif
6715       }
6716     /* Warn if the r prefix on a general reg is present.  */
6717     else if (i.types[op].bitfield.qword
6718 	     && (i.tm.operand_types[op].bitfield.class == Reg
6719 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6720 	     && i.tm.operand_types[op].bitfield.dword)
6721       {
6722 	if (intel_syntax
6723 	    && i.tm.opcode_modifier.toqword
6724 	    && i.types[0].bitfield.class != RegSIMD)
6725 	  {
6726 	    /* Convert to QWORD.  We want REX byte. */
6727 	    i.suffix = QWORD_MNEM_SUFFIX;
6728 	  }
6729 	else
6730 	  {
6731 	    as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6732 		    register_prefix, i.op[op].regs->reg_name,
6733 		    i.suffix);
6734 	    return 0;
6735 	  }
6736       }
6737   return 1;
6738 }
6739 
6740 static int
6741 check_qword_reg (void)
6742 {
6743   int op;
6744 
6745   for (op = i.operands; --op >= 0; )
6746     /* Skip non-register operands. */
6747     if (i.types[op].bitfield.class != Reg)
6748       continue;
6749     /* Reject eight bit registers, except where the template requires
6750        them. (eg. movzb)  */
6751     else if (i.types[op].bitfield.byte
6752 	     && (i.tm.operand_types[op].bitfield.class == Reg
6753 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6754 	     && (i.tm.operand_types[op].bitfield.word
6755 		 || i.tm.operand_types[op].bitfield.dword))
6756       {
6757 	as_bad (_("`%s%s' not allowed with `%s%c'"),
6758 		register_prefix,
6759 		i.op[op].regs->reg_name,
6760 		i.tm.name,
6761 		i.suffix);
6762 	return 0;
6763       }
6764     /* Warn if the r prefix on a general reg is missing.  */
6765     else if ((i.types[op].bitfield.word
6766 	      || i.types[op].bitfield.dword)
6767 	     && (i.tm.operand_types[op].bitfield.class == Reg
6768 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6769 	     && i.tm.operand_types[op].bitfield.qword)
6770       {
6771 	/* Prohibit these changes in the 64bit mode, since the
6772 	   lowering is more complicated.  */
6773 	if (intel_syntax
6774 	    && i.tm.opcode_modifier.todword
6775 	    && i.types[0].bitfield.class != RegSIMD)
6776 	  {
6777 	    /* Convert to DWORD.  We don't want REX byte. */
6778 	    i.suffix = LONG_MNEM_SUFFIX;
6779 	  }
6780 	else
6781 	  {
6782 	    as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6783 		    register_prefix, i.op[op].regs->reg_name,
6784 		    i.suffix);
6785 	    return 0;
6786 	  }
6787       }
6788   return 1;
6789 }
6790 
6791 static int
6792 check_word_reg (void)
6793 {
6794   int op;
6795   for (op = i.operands; --op >= 0;)
6796     /* Skip non-register operands. */
6797     if (i.types[op].bitfield.class != Reg)
6798       continue;
6799     /* Reject eight bit registers, except where the template requires
6800        them. (eg. movzb)  */
6801     else if (i.types[op].bitfield.byte
6802 	     && (i.tm.operand_types[op].bitfield.class == Reg
6803 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6804 	     && (i.tm.operand_types[op].bitfield.word
6805 		 || i.tm.operand_types[op].bitfield.dword))
6806       {
6807 	as_bad (_("`%s%s' not allowed with `%s%c'"),
6808 		register_prefix,
6809 		i.op[op].regs->reg_name,
6810 		i.tm.name,
6811 		i.suffix);
6812 	return 0;
6813       }
6814     /* Warn if the e or r prefix on a general reg is present.  */
6815     else if ((!quiet_warnings || flag_code == CODE_64BIT)
6816 	     && (i.types[op].bitfield.dword
6817 		 || i.types[op].bitfield.qword)
6818 	     && (i.tm.operand_types[op].bitfield.class == Reg
6819 		 || i.tm.operand_types[op].bitfield.instance == Accum)
6820 	     && i.tm.operand_types[op].bitfield.word)
6821       {
6822 	/* Prohibit these changes in the 64bit mode, since the
6823 	   lowering is more complicated.  */
6824 	if (flag_code == CODE_64BIT)
6825 	  {
6826 	    as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
6827 		    register_prefix, i.op[op].regs->reg_name,
6828 		    i.suffix);
6829 	    return 0;
6830 	  }
6831 #if REGISTER_WARNINGS
6832 	as_warn (_("using `%s%s' instead of `%s%s' due to `%c' suffix"),
6833 		 register_prefix,
6834 		 (i.op[op].regs + REGNAM_AX - REGNAM_EAX)->reg_name,
6835 		 register_prefix, i.op[op].regs->reg_name, i.suffix);
6836 #endif
6837       }
6838   return 1;
6839 }
6840 
6841 static int
6842 update_imm (unsigned int j)
6843 {
6844   i386_operand_type overlap = i.types[j];
6845   if ((overlap.bitfield.imm8
6846        || overlap.bitfield.imm8s
6847        || overlap.bitfield.imm16
6848        || overlap.bitfield.imm32
6849        || overlap.bitfield.imm32s
6850        || overlap.bitfield.imm64)
6851       && !operand_type_equal (&overlap, &imm8)
6852       && !operand_type_equal (&overlap, &imm8s)
6853       && !operand_type_equal (&overlap, &imm16)
6854       && !operand_type_equal (&overlap, &imm32)
6855       && !operand_type_equal (&overlap, &imm32s)
6856       && !operand_type_equal (&overlap, &imm64))
6857     {
6858       if (i.suffix)
6859 	{
6860 	  i386_operand_type temp;
6861 
6862 	  operand_type_set (&temp, 0);
6863 	  if (i.suffix == BYTE_MNEM_SUFFIX)
6864 	    {
6865 	      temp.bitfield.imm8 = overlap.bitfield.imm8;
6866 	      temp.bitfield.imm8s = overlap.bitfield.imm8s;
6867 	    }
6868 	  else if (i.suffix == WORD_MNEM_SUFFIX)
6869 	    temp.bitfield.imm16 = overlap.bitfield.imm16;
6870 	  else if (i.suffix == QWORD_MNEM_SUFFIX)
6871 	    {
6872 	      temp.bitfield.imm64 = overlap.bitfield.imm64;
6873 	      temp.bitfield.imm32s = overlap.bitfield.imm32s;
6874 	    }
6875 	  else
6876 	    temp.bitfield.imm32 = overlap.bitfield.imm32;
6877 	  overlap = temp;
6878 	}
6879       else if (operand_type_equal (&overlap, &imm16_32_32s)
6880 	       || operand_type_equal (&overlap, &imm16_32)
6881 	       || operand_type_equal (&overlap, &imm16_32s))
6882 	{
6883 	  if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6884 	    overlap = imm16;
6885 	  else
6886 	    overlap = imm32s;
6887 	}
6888       if (!operand_type_equal (&overlap, &imm8)
6889 	  && !operand_type_equal (&overlap, &imm8s)
6890 	  && !operand_type_equal (&overlap, &imm16)
6891 	  && !operand_type_equal (&overlap, &imm32)
6892 	  && !operand_type_equal (&overlap, &imm32s)
6893 	  && !operand_type_equal (&overlap, &imm64))
6894 	{
6895 	  as_bad (_("no instruction mnemonic suffix given; "
6896 		    "can't determine immediate size"));
6897 	  return 0;
6898 	}
6899     }
6900   i.types[j] = overlap;
6901 
6902   return 1;
6903 }
6904 
6905 static int
6906 finalize_imm (void)
6907 {
6908   unsigned int j, n;
6909 
6910   /* Update the first 2 immediate operands.  */
6911   n = i.operands > 2 ? 2 : i.operands;
6912   if (n)
6913     {
6914       for (j = 0; j < n; j++)
6915 	if (update_imm (j) == 0)
6916 	  return 0;
6917 
6918       /* The 3rd operand can't be immediate operand.  */
6919       gas_assert (operand_type_check (i.types[2], imm) == 0);
6920     }
6921 
6922   return 1;
6923 }
6924 
6925 static int
6926 process_operands (void)
6927 {
6928   /* Default segment register this instruction will use for memory
6929      accesses.  0 means unknown.  This is only for optimizing out
6930      unnecessary segment overrides.  */
6931   const seg_entry *default_seg = 0;
6932 
6933   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
6934     {
6935       unsigned int dupl = i.operands;
6936       unsigned int dest = dupl - 1;
6937       unsigned int j;
6938 
6939       /* The destination must be an xmm register.  */
6940       gas_assert (i.reg_operands
6941 		  && MAX_OPERANDS > dupl
6942 		  && operand_type_equal (&i.types[dest], &regxmm));
6943 
6944       if (i.tm.operand_types[0].bitfield.instance == Accum
6945 	  && i.tm.operand_types[0].bitfield.xmmword)
6946 	{
6947 	  if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
6948 	    {
6949 	      /* Keep xmm0 for instructions with VEX prefix and 3
6950 		 sources.  */
6951 	      i.tm.operand_types[0].bitfield.instance = InstanceNone;
6952 	      i.tm.operand_types[0].bitfield.class = RegSIMD;
6953 	      goto duplicate;
6954 	    }
6955 	  else
6956 	    {
6957 	      /* We remove the first xmm0 and keep the number of
6958 		 operands unchanged, which in fact duplicates the
6959 		 destination.  */
6960 	      for (j = 1; j < i.operands; j++)
6961 		{
6962 		  i.op[j - 1] = i.op[j];
6963 		  i.types[j - 1] = i.types[j];
6964 		  i.tm.operand_types[j - 1] = i.tm.operand_types[j];
6965 		  i.flags[j - 1] = i.flags[j];
6966 		}
6967 	    }
6968 	}
6969       else if (i.tm.opcode_modifier.implicit1stxmm0)
6970 	{
6971 	  gas_assert ((MAX_OPERANDS - 1) > dupl
6972 		      && (i.tm.opcode_modifier.vexsources
6973 			  == VEX3SOURCES));
6974 
6975 	  /* Add the implicit xmm0 for instructions with VEX prefix
6976 	     and 3 sources.  */
6977 	  for (j = i.operands; j > 0; j--)
6978 	    {
6979 	      i.op[j] = i.op[j - 1];
6980 	      i.types[j] = i.types[j - 1];
6981 	      i.tm.operand_types[j] = i.tm.operand_types[j - 1];
6982 	      i.flags[j] = i.flags[j - 1];
6983 	    }
6984 	  i.op[0].regs
6985 	    = (const reg_entry *) hash_find (reg_hash, "xmm0");
6986 	  i.types[0] = regxmm;
6987 	  i.tm.operand_types[0] = regxmm;
6988 
6989 	  i.operands += 2;
6990 	  i.reg_operands += 2;
6991 	  i.tm.operands += 2;
6992 
6993 	  dupl++;
6994 	  dest++;
6995 	  i.op[dupl] = i.op[dest];
6996 	  i.types[dupl] = i.types[dest];
6997 	  i.tm.operand_types[dupl] = i.tm.operand_types[dest];
6998 	  i.flags[dupl] = i.flags[dest];
6999 	}
7000       else
7001 	{
7002 duplicate:
7003 	  i.operands++;
7004 	  i.reg_operands++;
7005 	  i.tm.operands++;
7006 
7007 	  i.op[dupl] = i.op[dest];
7008 	  i.types[dupl] = i.types[dest];
7009 	  i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7010 	  i.flags[dupl] = i.flags[dest];
7011 	}
7012 
7013        if (i.tm.opcode_modifier.immext)
7014 	 process_immext ();
7015     }
7016   else if (i.tm.operand_types[0].bitfield.instance == Accum
7017 	   && i.tm.operand_types[0].bitfield.xmmword)
7018     {
7019       unsigned int j;
7020 
7021       for (j = 1; j < i.operands; j++)
7022 	{
7023 	  i.op[j - 1] = i.op[j];
7024 	  i.types[j - 1] = i.types[j];
7025 
7026 	  /* We need to adjust fields in i.tm since they are used by
7027 	     build_modrm_byte.  */
7028 	  i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7029 
7030 	  i.flags[j - 1] = i.flags[j];
7031 	}
7032 
7033       i.operands--;
7034       i.reg_operands--;
7035       i.tm.operands--;
7036     }
7037   else if (i.tm.opcode_modifier.implicitquadgroup)
7038     {
7039       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7040 
7041       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7042       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7043       regnum = register_number (i.op[1].regs);
7044       first_reg_in_group = regnum & ~3;
7045       last_reg_in_group = first_reg_in_group + 3;
7046       if (regnum != first_reg_in_group)
7047 	as_warn (_("source register `%s%s' implicitly denotes"
7048 		   " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7049 		 register_prefix, i.op[1].regs->reg_name,
7050 		 register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7051 		 register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7052 		 i.tm.name);
7053     }
7054   else if (i.tm.opcode_modifier.regkludge)
7055     {
7056       /* The imul $imm, %reg instruction is converted into
7057 	 imul $imm, %reg, %reg, and the clr %reg instruction
7058 	 is converted into xor %reg, %reg.  */
7059 
7060       unsigned int first_reg_op;
7061 
7062       if (operand_type_check (i.types[0], reg))
7063 	first_reg_op = 0;
7064       else
7065 	first_reg_op = 1;
7066       /* Pretend we saw the extra register operand.  */
7067       gas_assert (i.reg_operands == 1
7068 		  && i.op[first_reg_op + 1].regs == 0);
7069       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7070       i.types[first_reg_op + 1] = i.types[first_reg_op];
7071       i.operands++;
7072       i.reg_operands++;
7073     }
7074 
7075   if (i.tm.opcode_modifier.modrm)
7076     {
7077       /* The opcode is completed (modulo i.tm.extension_opcode which
7078 	 must be put into the modrm byte).  Now, we make the modrm and
7079 	 index base bytes based on all the info we've collected.  */
7080 
7081       default_seg = build_modrm_byte ();
7082     }
7083   else if (i.types[0].bitfield.class == SReg)
7084     {
7085       if (flag_code != CODE_64BIT
7086 	  ? i.tm.base_opcode == POP_SEG_SHORT
7087 	    && i.op[0].regs->reg_num == 1
7088 	  : (i.tm.base_opcode | 1) == POP_SEG386_SHORT
7089 	    && i.op[0].regs->reg_num < 4)
7090 	{
7091 	  as_bad (_("you can't `%s %s%s'"),
7092 		  i.tm.name, register_prefix, i.op[0].regs->reg_name);
7093 	  return 0;
7094 	}
7095       if ( i.op[0].regs->reg_num > 3 && i.tm.opcode_length == 1 )
7096 	{
7097 	  i.tm.base_opcode ^= POP_SEG_SHORT ^ POP_SEG386_SHORT;
7098 	  i.tm.opcode_length = 2;
7099 	}
7100       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7101     }
7102   else if ((i.tm.base_opcode & ~0x3) == MOV_AX_DISP32)
7103     {
7104       default_seg = &ds;
7105     }
7106   else if (i.tm.opcode_modifier.isstring)
7107     {
7108       /* For the string instructions that allow a segment override
7109 	 on one of their operands, the default segment is ds.  */
7110       default_seg = &ds;
7111     }
7112   else if (i.tm.opcode_modifier.shortform)
7113     {
7114       /* The register or float register operand is in operand
7115 	 0 or 1.  */
7116       unsigned int op = i.tm.operand_types[0].bitfield.class != Reg;
7117 
7118       /* Register goes in low 3 bits of opcode.  */
7119       i.tm.base_opcode |= i.op[op].regs->reg_num;
7120       if ((i.op[op].regs->reg_flags & RegRex) != 0)
7121 	i.rex |= REX_B;
7122       if (!quiet_warnings && i.tm.opcode_modifier.ugh)
7123 	{
7124 	  /* Warn about some common errors, but press on regardless.
7125 	     The first case can be generated by gcc (<= 2.8.1).  */
7126 	  if (i.operands == 2)
7127 	    {
7128 	      /* Reversed arguments on faddp, fsubp, etc.  */
7129 	      as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
7130 		       register_prefix, i.op[!intel_syntax].regs->reg_name,
7131 		       register_prefix, i.op[intel_syntax].regs->reg_name);
7132 	    }
7133 	  else
7134 	    {
7135 	      /* Extraneous `l' suffix on fp insn.  */
7136 	      as_warn (_("translating to `%s %s%s'"), i.tm.name,
7137 		       register_prefix, i.op[0].regs->reg_name);
7138 	    }
7139 	}
7140     }
7141 
7142   if (i.tm.base_opcode == 0x8d /* lea */
7143       && i.seg[0]
7144       && !quiet_warnings)
7145     as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
7146 
7147   /* If a segment was explicitly specified, and the specified segment
7148      is not the default, use an opcode prefix to select it.  If we
7149      never figured out what the default segment is, then default_seg
7150      will be zero at this point, and the specified segment prefix will
7151      always be used.  */
7152   if ((i.seg[0]) && (i.seg[0] != default_seg))
7153     {
7154       if (!add_prefix (i.seg[0]->seg_prefix))
7155 	return 0;
7156     }
7157   return 1;
7158 }
7159 
7160 static const seg_entry *
7161 build_modrm_byte (void)
7162 {
7163   const seg_entry *default_seg = 0;
7164   unsigned int source, dest;
7165   int vex_3_sources;
7166 
7167   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
7168   if (vex_3_sources)
7169     {
7170       unsigned int nds, reg_slot;
7171       expressionS *exp;
7172 
7173       dest = i.operands - 1;
7174       nds = dest - 1;
7175 
7176       /* There are 2 kinds of instructions:
7177 	 1. 5 operands: 4 register operands or 3 register operands
7178 	 plus 1 memory operand plus one Imm4 operand, VexXDS, and
7179 	 VexW0 or VexW1.  The destination must be either XMM, YMM or
7180 	 ZMM register.
7181 	 2. 4 operands: 4 register operands or 3 register operands
7182 	 plus 1 memory operand, with VexXDS.  */
7183       gas_assert ((i.reg_operands == 4
7184 		   || (i.reg_operands == 3 && i.mem_operands == 1))
7185 		  && i.tm.opcode_modifier.vexvvvv == VEXXDS
7186 		  && i.tm.opcode_modifier.vexw
7187 		  && i.tm.operand_types[dest].bitfield.class == RegSIMD);
7188 
7189       /* If VexW1 is set, the first non-immediate operand is the source and
7190 	 the second non-immediate one is encoded in the immediate operand.  */
7191       if (i.tm.opcode_modifier.vexw == VEXW1)
7192 	{
7193 	  source = i.imm_operands;
7194 	  reg_slot = i.imm_operands + 1;
7195 	}
7196       else
7197 	{
7198 	  source = i.imm_operands + 1;
7199 	  reg_slot = i.imm_operands;
7200 	}
7201 
7202       if (i.imm_operands == 0)
7203 	{
7204 	  /* When there is no immediate operand, generate an 8bit
7205 	     immediate operand to encode the first operand.  */
7206 	  exp = &im_expressions[i.imm_operands++];
7207 	  i.op[i.operands].imms = exp;
7208 	  i.types[i.operands] = imm8;
7209 	  i.operands++;
7210 
7211 	  gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7212 	  exp->X_op = O_constant;
7213 	  exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
7214 	  gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7215 	}
7216       else
7217 	{
7218 	  gas_assert (i.imm_operands == 1);
7219 	  gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
7220 	  gas_assert (!i.tm.opcode_modifier.immext);
7221 
7222 	  /* Turn on Imm8 again so that output_imm will generate it.  */
7223 	  i.types[0].bitfield.imm8 = 1;
7224 
7225 	  gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
7226 	  i.op[0].imms->X_add_number
7227 	      |= register_number (i.op[reg_slot].regs) << 4;
7228 	  gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
7229 	}
7230 
7231       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
7232       i.vex.register_specifier = i.op[nds].regs;
7233     }
7234   else
7235     source = dest = 0;
7236 
7237   /* i.reg_operands MUST be the number of real register operands;
7238      implicit registers do not count.  If there are 3 register
7239      operands, it must be a instruction with VexNDS.  For a
7240      instruction with VexNDD, the destination register is encoded
7241      in VEX prefix.  If there are 4 register operands, it must be
7242      a instruction with VEX prefix and 3 sources.  */
7243   if (i.mem_operands == 0
7244       && ((i.reg_operands == 2
7245 	   && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
7246 	  || (i.reg_operands == 3
7247 	      && i.tm.opcode_modifier.vexvvvv == VEXXDS)
7248 	  || (i.reg_operands == 4 && vex_3_sources)))
7249     {
7250       switch (i.operands)
7251 	{
7252 	case 2:
7253 	  source = 0;
7254 	  break;
7255 	case 3:
7256 	  /* When there are 3 operands, one of them may be immediate,
7257 	     which may be the first or the last operand.  Otherwise,
7258 	     the first operand must be shift count register (cl) or it
7259 	     is an instruction with VexNDS. */
7260 	  gas_assert (i.imm_operands == 1
7261 		      || (i.imm_operands == 0
7262 			  && (i.tm.opcode_modifier.vexvvvv == VEXXDS
7263 			      || (i.types[0].bitfield.instance == RegC
7264 				  && i.types[0].bitfield.byte))));
7265 	  if (operand_type_check (i.types[0], imm)
7266 	      || (i.types[0].bitfield.instance == RegC
7267 		  && i.types[0].bitfield.byte))
7268 	    source = 1;
7269 	  else
7270 	    source = 0;
7271 	  break;
7272 	case 4:
7273 	  /* When there are 4 operands, the first two must be 8bit
7274 	     immediate operands. The source operand will be the 3rd
7275 	     one.
7276 
7277 	     For instructions with VexNDS, if the first operand
7278 	     an imm8, the source operand is the 2nd one.  If the last
7279 	     operand is imm8, the source operand is the first one.  */
7280 	  gas_assert ((i.imm_operands == 2
7281 		       && i.types[0].bitfield.imm8
7282 		       && i.types[1].bitfield.imm8)
7283 		      || (i.tm.opcode_modifier.vexvvvv == VEXXDS
7284 			  && i.imm_operands == 1
7285 			  && (i.types[0].bitfield.imm8
7286 			      || i.types[i.operands - 1].bitfield.imm8
7287 			      || i.rounding)));
7288 	  if (i.imm_operands == 2)
7289 	    source = 2;
7290 	  else
7291 	    {
7292 	      if (i.types[0].bitfield.imm8)
7293 		source = 1;
7294 	      else
7295 		source = 0;
7296 	    }
7297 	  break;
7298 	case 5:
7299 	  if (is_evex_encoding (&i.tm))
7300 	    {
7301 	      /* For EVEX instructions, when there are 5 operands, the
7302 		 first one must be immediate operand.  If the second one
7303 		 is immediate operand, the source operand is the 3th
7304 		 one.  If the last one is immediate operand, the source
7305 		 operand is the 2nd one.  */
7306 	      gas_assert (i.imm_operands == 2
7307 			  && i.tm.opcode_modifier.sae
7308 			  && operand_type_check (i.types[0], imm));
7309 	      if (operand_type_check (i.types[1], imm))
7310 		source = 2;
7311 	      else if (operand_type_check (i.types[4], imm))
7312 		source = 1;
7313 	      else
7314 		abort ();
7315 	    }
7316 	  break;
7317 	default:
7318 	  abort ();
7319 	}
7320 
7321       if (!vex_3_sources)
7322 	{
7323 	  dest = source + 1;
7324 
7325 	  /* RC/SAE operand could be between DEST and SRC.  That happens
7326 	     when one operand is GPR and the other one is XMM/YMM/ZMM
7327 	     register.  */
7328 	  if (i.rounding && i.rounding->operand == (int) dest)
7329 	    dest++;
7330 
7331 	  if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7332 	    {
7333 	      /* For instructions with VexNDS, the register-only source
7334 		 operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
7335 		 register.  It is encoded in VEX prefix.  */
7336 
7337 	      i386_operand_type op;
7338 	      unsigned int vvvv;
7339 
7340 	      /* Check register-only source operand when two source
7341 		 operands are swapped.  */
7342 	      if (!i.tm.operand_types[source].bitfield.baseindex
7343 		  && i.tm.operand_types[dest].bitfield.baseindex)
7344 		{
7345 		  vvvv = source;
7346 		  source = dest;
7347 		}
7348 	      else
7349 		vvvv = dest;
7350 
7351 	      op = i.tm.operand_types[vvvv];
7352 	      if ((dest + 1) >= i.operands
7353 		  || ((op.bitfield.class != Reg
7354 		       || (!op.bitfield.dword && !op.bitfield.qword))
7355 		      && op.bitfield.class != RegSIMD
7356 		      && !operand_type_equal (&op, &regmask)))
7357 		abort ();
7358 	      i.vex.register_specifier = i.op[vvvv].regs;
7359 	      dest++;
7360 	    }
7361 	}
7362 
7363       i.rm.mode = 3;
7364       /* One of the register operands will be encoded in the i.rm.reg
7365 	 field, the other in the combined i.rm.mode and i.rm.regmem
7366 	 fields.  If no form of this instruction supports a memory
7367 	 destination operand, then we assume the source operand may
7368 	 sometimes be a memory operand and so we need to store the
7369 	 destination in the i.rm.reg field.  */
7370       if (!i.tm.opcode_modifier.regmem
7371 	  && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
7372 	{
7373 	  i.rm.reg = i.op[dest].regs->reg_num;
7374 	  i.rm.regmem = i.op[source].regs->reg_num;
7375 	  if (i.op[dest].regs->reg_type.bitfield.class == RegMMX
7376 	       || i.op[source].regs->reg_type.bitfield.class == RegMMX)
7377 	    i.has_regmmx = TRUE;
7378 	  else if (i.op[dest].regs->reg_type.bitfield.class == RegSIMD
7379 		   || i.op[source].regs->reg_type.bitfield.class == RegSIMD)
7380 	    {
7381 	      if (i.types[dest].bitfield.zmmword
7382 		  || i.types[source].bitfield.zmmword)
7383 		i.has_regzmm = TRUE;
7384 	      else if (i.types[dest].bitfield.ymmword
7385 		       || i.types[source].bitfield.ymmword)
7386 		i.has_regymm = TRUE;
7387 	      else
7388 		i.has_regxmm = TRUE;
7389 	    }
7390 	  if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7391 	    i.rex |= REX_R;
7392 	  if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7393 	    i.vrex |= REX_R;
7394 	  if ((i.op[source].regs->reg_flags & RegRex) != 0)
7395 	    i.rex |= REX_B;
7396 	  if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7397 	    i.vrex |= REX_B;
7398 	}
7399       else
7400 	{
7401 	  i.rm.reg = i.op[source].regs->reg_num;
7402 	  i.rm.regmem = i.op[dest].regs->reg_num;
7403 	  if ((i.op[dest].regs->reg_flags & RegRex) != 0)
7404 	    i.rex |= REX_B;
7405 	  if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
7406 	    i.vrex |= REX_B;
7407 	  if ((i.op[source].regs->reg_flags & RegRex) != 0)
7408 	    i.rex |= REX_R;
7409 	  if ((i.op[source].regs->reg_flags & RegVRex) != 0)
7410 	    i.vrex |= REX_R;
7411 	}
7412       if (flag_code != CODE_64BIT && (i.rex & REX_R))
7413 	{
7414 	  if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
7415 	    abort ();
7416 	  i.rex &= ~REX_R;
7417 	  add_prefix (LOCK_PREFIX_OPCODE);
7418 	}
7419     }
7420   else
7421     {			/* If it's not 2 reg operands...  */
7422       unsigned int mem;
7423 
7424       if (i.mem_operands)
7425 	{
7426 	  unsigned int fake_zero_displacement = 0;
7427 	  unsigned int op;
7428 
7429 	  for (op = 0; op < i.operands; op++)
7430 	    if (i.flags[op] & Operand_Mem)
7431 	      break;
7432 	  gas_assert (op < i.operands);
7433 
7434 	  if (i.tm.opcode_modifier.vecsib)
7435 	    {
7436 	      if (i.index_reg->reg_num == RegIZ)
7437 		abort ();
7438 
7439 	      i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7440 	      if (!i.base_reg)
7441 		{
7442 		  i.sib.base = NO_BASE_REGISTER;
7443 		  i.sib.scale = i.log2_scale_factor;
7444 		  i.types[op].bitfield.disp8 = 0;
7445 		  i.types[op].bitfield.disp16 = 0;
7446 		  i.types[op].bitfield.disp64 = 0;
7447 		  if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7448 		    {
7449 		      /* Must be 32 bit */
7450 		      i.types[op].bitfield.disp32 = 1;
7451 		      i.types[op].bitfield.disp32s = 0;
7452 		    }
7453 		  else
7454 		    {
7455 		      i.types[op].bitfield.disp32 = 0;
7456 		      i.types[op].bitfield.disp32s = 1;
7457 		    }
7458 		}
7459 	      i.sib.index = i.index_reg->reg_num;
7460 	      if ((i.index_reg->reg_flags & RegRex) != 0)
7461 		i.rex |= REX_X;
7462 	      if ((i.index_reg->reg_flags & RegVRex) != 0)
7463 		i.vrex |= REX_X;
7464 	    }
7465 
7466 	  default_seg = &ds;
7467 
7468 	  if (i.base_reg == 0)
7469 	    {
7470 	      i.rm.mode = 0;
7471 	      if (!i.disp_operands)
7472 		fake_zero_displacement = 1;
7473 	      if (i.index_reg == 0)
7474 		{
7475 		  i386_operand_type newdisp;
7476 
7477 		  gas_assert (!i.tm.opcode_modifier.vecsib);
7478 		  /* Operand is just <disp>  */
7479 		  if (flag_code == CODE_64BIT)
7480 		    {
7481 		      /* 64bit mode overwrites the 32bit absolute
7482 			 addressing by RIP relative addressing and
7483 			 absolute addressing is encoded by one of the
7484 			 redundant SIB forms.  */
7485 		      i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7486 		      i.sib.base = NO_BASE_REGISTER;
7487 		      i.sib.index = NO_INDEX_REGISTER;
7488 		      newdisp = (!i.prefix[ADDR_PREFIX] ? disp32s : disp32);
7489 		    }
7490 		  else if ((flag_code == CODE_16BIT)
7491 			   ^ (i.prefix[ADDR_PREFIX] != 0))
7492 		    {
7493 		      i.rm.regmem = NO_BASE_REGISTER_16;
7494 		      newdisp = disp16;
7495 		    }
7496 		  else
7497 		    {
7498 		      i.rm.regmem = NO_BASE_REGISTER;
7499 		      newdisp = disp32;
7500 		    }
7501 		  i.types[op] = operand_type_and_not (i.types[op], anydisp);
7502 		  i.types[op] = operand_type_or (i.types[op], newdisp);
7503 		}
7504 	      else if (!i.tm.opcode_modifier.vecsib)
7505 		{
7506 		  /* !i.base_reg && i.index_reg  */
7507 		  if (i.index_reg->reg_num == RegIZ)
7508 		    i.sib.index = NO_INDEX_REGISTER;
7509 		  else
7510 		    i.sib.index = i.index_reg->reg_num;
7511 		  i.sib.base = NO_BASE_REGISTER;
7512 		  i.sib.scale = i.log2_scale_factor;
7513 		  i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7514 		  i.types[op].bitfield.disp8 = 0;
7515 		  i.types[op].bitfield.disp16 = 0;
7516 		  i.types[op].bitfield.disp64 = 0;
7517 		  if (flag_code != CODE_64BIT || i.prefix[ADDR_PREFIX])
7518 		    {
7519 		      /* Must be 32 bit */
7520 		      i.types[op].bitfield.disp32 = 1;
7521 		      i.types[op].bitfield.disp32s = 0;
7522 		    }
7523 		  else
7524 		    {
7525 		      i.types[op].bitfield.disp32 = 0;
7526 		      i.types[op].bitfield.disp32s = 1;
7527 		    }
7528 		  if ((i.index_reg->reg_flags & RegRex) != 0)
7529 		    i.rex |= REX_X;
7530 		}
7531 	    }
7532 	  /* RIP addressing for 64bit mode.  */
7533 	  else if (i.base_reg->reg_num == RegIP)
7534 	    {
7535 	      gas_assert (!i.tm.opcode_modifier.vecsib);
7536 	      i.rm.regmem = NO_BASE_REGISTER;
7537 	      i.types[op].bitfield.disp8 = 0;
7538 	      i.types[op].bitfield.disp16 = 0;
7539 	      i.types[op].bitfield.disp32 = 0;
7540 	      i.types[op].bitfield.disp32s = 1;
7541 	      i.types[op].bitfield.disp64 = 0;
7542 	      i.flags[op] |= Operand_PCrel;
7543 	      if (! i.disp_operands)
7544 		fake_zero_displacement = 1;
7545 	    }
7546 	  else if (i.base_reg->reg_type.bitfield.word)
7547 	    {
7548 	      gas_assert (!i.tm.opcode_modifier.vecsib);
7549 	      switch (i.base_reg->reg_num)
7550 		{
7551 		case 3: /* (%bx)  */
7552 		  if (i.index_reg == 0)
7553 		    i.rm.regmem = 7;
7554 		  else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
7555 		    i.rm.regmem = i.index_reg->reg_num - 6;
7556 		  break;
7557 		case 5: /* (%bp)  */
7558 		  default_seg = &ss;
7559 		  if (i.index_reg == 0)
7560 		    {
7561 		      i.rm.regmem = 6;
7562 		      if (operand_type_check (i.types[op], disp) == 0)
7563 			{
7564 			  /* fake (%bp) into 0(%bp)  */
7565 			  i.types[op].bitfield.disp8 = 1;
7566 			  fake_zero_displacement = 1;
7567 			}
7568 		    }
7569 		  else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
7570 		    i.rm.regmem = i.index_reg->reg_num - 6 + 2;
7571 		  break;
7572 		default: /* (%si) -> 4 or (%di) -> 5  */
7573 		  i.rm.regmem = i.base_reg->reg_num - 6 + 4;
7574 		}
7575 	      i.rm.mode = mode_from_disp_size (i.types[op]);
7576 	    }
7577 	  else /* i.base_reg and 32/64 bit mode  */
7578 	    {
7579 	      if (flag_code == CODE_64BIT
7580 		  && operand_type_check (i.types[op], disp))
7581 		{
7582 		  i.types[op].bitfield.disp16 = 0;
7583 		  i.types[op].bitfield.disp64 = 0;
7584 		  if (i.prefix[ADDR_PREFIX] == 0)
7585 		    {
7586 		      i.types[op].bitfield.disp32 = 0;
7587 		      i.types[op].bitfield.disp32s = 1;
7588 		    }
7589 		  else
7590 		    {
7591 		      i.types[op].bitfield.disp32 = 1;
7592 		      i.types[op].bitfield.disp32s = 0;
7593 		    }
7594 		}
7595 
7596 	      if (!i.tm.opcode_modifier.vecsib)
7597 		i.rm.regmem = i.base_reg->reg_num;
7598 	      if ((i.base_reg->reg_flags & RegRex) != 0)
7599 		i.rex |= REX_B;
7600 	      i.sib.base = i.base_reg->reg_num;
7601 	      /* x86-64 ignores REX prefix bit here to avoid decoder
7602 		 complications.  */
7603 	      if (!(i.base_reg->reg_flags & RegRex)
7604 		  && (i.base_reg->reg_num == EBP_REG_NUM
7605 		   || i.base_reg->reg_num == ESP_REG_NUM))
7606 		  default_seg = &ss;
7607 	      if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
7608 		{
7609 		  fake_zero_displacement = 1;
7610 		  i.types[op].bitfield.disp8 = 1;
7611 		}
7612 	      i.sib.scale = i.log2_scale_factor;
7613 	      if (i.index_reg == 0)
7614 		{
7615 		  gas_assert (!i.tm.opcode_modifier.vecsib);
7616 		  /* <disp>(%esp) becomes two byte modrm with no index
7617 		     register.  We've already stored the code for esp
7618 		     in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
7619 		     Any base register besides %esp will not use the
7620 		     extra modrm byte.  */
7621 		  i.sib.index = NO_INDEX_REGISTER;
7622 		}
7623 	      else if (!i.tm.opcode_modifier.vecsib)
7624 		{
7625 		  if (i.index_reg->reg_num == RegIZ)
7626 		    i.sib.index = NO_INDEX_REGISTER;
7627 		  else
7628 		    i.sib.index = i.index_reg->reg_num;
7629 		  i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
7630 		  if ((i.index_reg->reg_flags & RegRex) != 0)
7631 		    i.rex |= REX_X;
7632 		}
7633 
7634 	      if (i.disp_operands
7635 		  && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
7636 		      || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
7637 		i.rm.mode = 0;
7638 	      else
7639 		{
7640 		  if (!fake_zero_displacement
7641 		      && !i.disp_operands
7642 		      && i.disp_encoding)
7643 		    {
7644 		      fake_zero_displacement = 1;
7645 		      if (i.disp_encoding == disp_encoding_8bit)
7646 			i.types[op].bitfield.disp8 = 1;
7647 		      else
7648 			i.types[op].bitfield.disp32 = 1;
7649 		    }
7650 		  i.rm.mode = mode_from_disp_size (i.types[op]);
7651 		}
7652 	    }
7653 
7654 	  if (fake_zero_displacement)
7655 	    {
7656 	      /* Fakes a zero displacement assuming that i.types[op]
7657 		 holds the correct displacement size.  */
7658 	      expressionS *exp;
7659 
7660 	      gas_assert (i.op[op].disps == 0);
7661 	      exp = &disp_expressions[i.disp_operands++];
7662 	      i.op[op].disps = exp;
7663 	      exp->X_op = O_constant;
7664 	      exp->X_add_number = 0;
7665 	      exp->X_add_symbol = (symbolS *) 0;
7666 	      exp->X_op_symbol = (symbolS *) 0;
7667 	    }
7668 
7669 	  mem = op;
7670 	}
7671       else
7672 	mem = ~0;
7673 
7674       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
7675 	{
7676 	  if (operand_type_check (i.types[0], imm))
7677 	    i.vex.register_specifier = NULL;
7678 	  else
7679 	    {
7680 	      /* VEX.vvvv encodes one of the sources when the first
7681 		 operand is not an immediate.  */
7682 	      if (i.tm.opcode_modifier.vexw == VEXW0)
7683 		i.vex.register_specifier = i.op[0].regs;
7684 	      else
7685 		i.vex.register_specifier = i.op[1].regs;
7686 	    }
7687 
7688 	  /* Destination is a XMM register encoded in the ModRM.reg
7689 	     and VEX.R bit.  */
7690 	  i.rm.reg = i.op[2].regs->reg_num;
7691 	  if ((i.op[2].regs->reg_flags & RegRex) != 0)
7692 	    i.rex |= REX_R;
7693 
7694 	  /* ModRM.rm and VEX.B encodes the other source.  */
7695 	  if (!i.mem_operands)
7696 	    {
7697 	      i.rm.mode = 3;
7698 
7699 	      if (i.tm.opcode_modifier.vexw == VEXW0)
7700 		i.rm.regmem = i.op[1].regs->reg_num;
7701 	      else
7702 		i.rm.regmem = i.op[0].regs->reg_num;
7703 
7704 	      if ((i.op[1].regs->reg_flags & RegRex) != 0)
7705 		i.rex |= REX_B;
7706 	    }
7707 	}
7708       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
7709 	{
7710 	  i.vex.register_specifier = i.op[2].regs;
7711 	  if (!i.mem_operands)
7712 	    {
7713 	      i.rm.mode = 3;
7714 	      i.rm.regmem = i.op[1].regs->reg_num;
7715 	      if ((i.op[1].regs->reg_flags & RegRex) != 0)
7716 		i.rex |= REX_B;
7717 	    }
7718 	}
7719       /* Fill in i.rm.reg or i.rm.regmem field with register operand
7720 	 (if any) based on i.tm.extension_opcode.  Again, we must be
7721 	 careful to make sure that segment/control/debug/test/MMX
7722 	 registers are coded into the i.rm.reg field.  */
7723       else if (i.reg_operands)
7724 	{
7725 	  unsigned int op;
7726 	  unsigned int vex_reg = ~0;
7727 
7728 	  for (op = 0; op < i.operands; op++)
7729 	    {
7730 	      if (i.types[op].bitfield.class == Reg
7731 		  || i.types[op].bitfield.class == RegBND
7732 		  || i.types[op].bitfield.class == RegMask
7733 		  || i.types[op].bitfield.class == SReg
7734 		  || i.types[op].bitfield.class == RegCR
7735 		  || i.types[op].bitfield.class == RegDR
7736 		  || i.types[op].bitfield.class == RegTR)
7737 		break;
7738 	      if (i.types[op].bitfield.class == RegSIMD)
7739 		{
7740 		  if (i.types[op].bitfield.zmmword)
7741 		    i.has_regzmm = TRUE;
7742 		  else if (i.types[op].bitfield.ymmword)
7743 		    i.has_regymm = TRUE;
7744 		  else
7745 		    i.has_regxmm = TRUE;
7746 		  break;
7747 		}
7748 	      if (i.types[op].bitfield.class == RegMMX)
7749 		{
7750 		  i.has_regmmx = TRUE;
7751 		  break;
7752 		}
7753 	    }
7754 
7755 	  if (vex_3_sources)
7756 	    op = dest;
7757 	  else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
7758 	    {
7759 	      /* For instructions with VexNDS, the register-only
7760 		 source operand is encoded in VEX prefix. */
7761 	      gas_assert (mem != (unsigned int) ~0);
7762 
7763 	      if (op > mem)
7764 		{
7765 		  vex_reg = op++;
7766 		  gas_assert (op < i.operands);
7767 		}
7768 	      else
7769 		{
7770 		  /* Check register-only source operand when two source
7771 		     operands are swapped.  */
7772 		  if (!i.tm.operand_types[op].bitfield.baseindex
7773 		      && i.tm.operand_types[op + 1].bitfield.baseindex)
7774 		    {
7775 		      vex_reg = op;
7776 		      op += 2;
7777 		      gas_assert (mem == (vex_reg + 1)
7778 				  && op < i.operands);
7779 		    }
7780 		  else
7781 		    {
7782 		      vex_reg = op + 1;
7783 		      gas_assert (vex_reg < i.operands);
7784 		    }
7785 		}
7786 	    }
7787 	  else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
7788 	    {
7789 	      /* For instructions with VexNDD, the register destination
7790 		 is encoded in VEX prefix.  */
7791 	      if (i.mem_operands == 0)
7792 		{
7793 		  /* There is no memory operand.  */
7794 		  gas_assert ((op + 2) == i.operands);
7795 		  vex_reg = op + 1;
7796 		}
7797 	      else
7798 		{
7799 		  /* There are only 2 non-immediate operands.  */
7800 		  gas_assert (op < i.imm_operands + 2
7801 			      && i.operands == i.imm_operands + 2);
7802 		  vex_reg = i.imm_operands + 1;
7803 		}
7804 	    }
7805 	  else
7806 	    gas_assert (op < i.operands);
7807 
7808 	  if (vex_reg != (unsigned int) ~0)
7809 	    {
7810 	      i386_operand_type *type = &i.tm.operand_types[vex_reg];
7811 
7812 	      if ((type->bitfield.class != Reg
7813 		   || (!type->bitfield.dword && !type->bitfield.qword))
7814 		  && type->bitfield.class != RegSIMD
7815 		  && !operand_type_equal (type, &regmask))
7816 		abort ();
7817 
7818 	      i.vex.register_specifier = i.op[vex_reg].regs;
7819 	    }
7820 
7821 	  /* Don't set OP operand twice.  */
7822 	  if (vex_reg != op)
7823 	    {
7824 	      /* If there is an extension opcode to put here, the
7825 		 register number must be put into the regmem field.  */
7826 	      if (i.tm.extension_opcode != None)
7827 		{
7828 		  i.rm.regmem = i.op[op].regs->reg_num;
7829 		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
7830 		    i.rex |= REX_B;
7831 		  if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7832 		    i.vrex |= REX_B;
7833 		}
7834 	      else
7835 		{
7836 		  i.rm.reg = i.op[op].regs->reg_num;
7837 		  if ((i.op[op].regs->reg_flags & RegRex) != 0)
7838 		    i.rex |= REX_R;
7839 		  if ((i.op[op].regs->reg_flags & RegVRex) != 0)
7840 		    i.vrex |= REX_R;
7841 		}
7842 	    }
7843 
7844 	  /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
7845 	     must set it to 3 to indicate this is a register operand
7846 	     in the regmem field.  */
7847 	  if (!i.mem_operands)
7848 	    i.rm.mode = 3;
7849 	}
7850 
7851       /* Fill in i.rm.reg field with extension opcode (if any).  */
7852       if (i.tm.extension_opcode != None)
7853 	i.rm.reg = i.tm.extension_opcode;
7854     }
7855   return default_seg;
7856 }
7857 
7858 static unsigned int
7859 flip_code16 (unsigned int code16)
7860 {
7861   gas_assert (i.tm.operands == 1);
7862 
7863   return !(i.prefix[REX_PREFIX] & REX_W)
7864 	 && (code16 ? i.tm.operand_types[0].bitfield.disp32
7865 		      || i.tm.operand_types[0].bitfield.disp32s
7866 		    : i.tm.operand_types[0].bitfield.disp16)
7867 	 ? CODE16 : 0;
7868 }
7869 
7870 static void
7871 output_branch (void)
7872 {
7873   char *p;
7874   int size;
7875   int code16;
7876   int prefix;
7877   relax_substateT subtype;
7878   symbolS *sym;
7879   offsetT off;
7880 
7881   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
7882   size = i.disp_encoding == disp_encoding_32bit ? BIG : SMALL;
7883 
7884   prefix = 0;
7885   if (i.prefix[DATA_PREFIX] != 0)
7886     {
7887       prefix = 1;
7888       i.prefixes -= 1;
7889       code16 ^= flip_code16(code16);
7890     }
7891   /* Pentium4 branch hints.  */
7892   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
7893       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
7894     {
7895       prefix++;
7896       i.prefixes--;
7897     }
7898   if (i.prefix[REX_PREFIX] != 0)
7899     {
7900       prefix++;
7901       i.prefixes--;
7902     }
7903 
7904   /* BND prefixed jump.  */
7905   if (i.prefix[BND_PREFIX] != 0)
7906     {
7907       prefix++;
7908       i.prefixes--;
7909     }
7910 
7911   if (i.prefixes != 0)
7912     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
7913 
7914   /* It's always a symbol;  End frag & setup for relax.
7915      Make sure there is enough room in this frag for the largest
7916      instruction we may generate in md_convert_frag.  This is 2
7917      bytes for the opcode and room for the prefix and largest
7918      displacement.  */
7919   frag_grow (prefix + 2 + 4);
7920   /* Prefix and 1 opcode byte go in fr_fix.  */
7921   p = frag_more (prefix + 1);
7922   if (i.prefix[DATA_PREFIX] != 0)
7923     *p++ = DATA_PREFIX_OPCODE;
7924   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
7925       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
7926     *p++ = i.prefix[SEG_PREFIX];
7927   if (i.prefix[BND_PREFIX] != 0)
7928     *p++ = BND_PREFIX_OPCODE;
7929   if (i.prefix[REX_PREFIX] != 0)
7930     *p++ = i.prefix[REX_PREFIX];
7931   *p = i.tm.base_opcode;
7932 
7933   if ((unsigned char) *p == JUMP_PC_RELATIVE)
7934     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
7935   else if (cpu_arch_flags.bitfield.cpui386)
7936     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
7937   else
7938     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
7939   subtype |= code16;
7940 
7941   sym = i.op[0].disps->X_add_symbol;
7942   off = i.op[0].disps->X_add_number;
7943 
7944   if (i.op[0].disps->X_op != O_constant
7945       && i.op[0].disps->X_op != O_symbol)
7946     {
7947       /* Handle complex expressions.  */
7948       sym = make_expr_symbol (i.op[0].disps);
7949       off = 0;
7950     }
7951 
7952   /* 1 possible extra opcode + 4 byte displacement go in var part.
7953      Pass reloc in fr_var.  */
7954   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
7955 }
7956 
7957 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
7958 /* Return TRUE iff PLT32 relocation should be used for branching to
7959    symbol S.  */
7960 
7961 static bfd_boolean
7962 need_plt32_p (symbolS *s)
7963 {
7964   /* PLT32 relocation is ELF only.  */
7965   if (!IS_ELF)
7966     return FALSE;
7967 
7968 #ifdef TE_SOLARIS
7969   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
7970      krtld support it.  */
7971   return FALSE;
7972 #endif
7973 
7974   /* Since there is no need to prepare for PLT branch on x86-64, we
7975      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
7976      be used as a marker for 32-bit PC-relative branches.  */
7977   if (!object_64bit)
7978     return FALSE;
7979 
7980   /* Weak or undefined symbol need PLT32 relocation.  */
7981   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
7982     return TRUE;
7983 
7984   /* Non-global symbol doesn't need PLT32 relocation.  */
7985   if (! S_IS_EXTERNAL (s))
7986     return FALSE;
7987 
7988   /* Other global symbols need PLT32 relocation.  NB: Symbol with
7989      non-default visibilities are treated as normal global symbol
7990      so that PLT32 relocation can be used as a marker for 32-bit
7991      PC-relative branches.  It is useful for linker relaxation.  */
7992   return TRUE;
7993 }
7994 #endif
7995 
7996 static void
7997 output_jump (void)
7998 {
7999   char *p;
8000   int size;
8001   fixS *fixP;
8002   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8003 
8004   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8005     {
8006       /* This is a loop or jecxz type instruction.  */
8007       size = 1;
8008       if (i.prefix[ADDR_PREFIX] != 0)
8009 	{
8010 	  FRAG_APPEND_1_CHAR (ADDR_PREFIX_OPCODE);
8011 	  i.prefixes -= 1;
8012 	}
8013       /* Pentium4 branch hints.  */
8014       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8015 	  || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8016 	{
8017 	  FRAG_APPEND_1_CHAR (i.prefix[SEG_PREFIX]);
8018 	  i.prefixes--;
8019 	}
8020     }
8021   else
8022     {
8023       int code16;
8024 
8025       code16 = 0;
8026       if (flag_code == CODE_16BIT)
8027 	code16 = CODE16;
8028 
8029       if (i.prefix[DATA_PREFIX] != 0)
8030 	{
8031 	  FRAG_APPEND_1_CHAR (DATA_PREFIX_OPCODE);
8032 	  i.prefixes -= 1;
8033 	  code16 ^= flip_code16(code16);
8034 	}
8035 
8036       size = 4;
8037       if (code16)
8038 	size = 2;
8039     }
8040 
8041   /* BND prefixed jump.  */
8042   if (i.prefix[BND_PREFIX] != 0)
8043     {
8044       FRAG_APPEND_1_CHAR (i.prefix[BND_PREFIX]);
8045       i.prefixes -= 1;
8046     }
8047 
8048   if (i.prefix[REX_PREFIX] != 0)
8049     {
8050       FRAG_APPEND_1_CHAR (i.prefix[REX_PREFIX]);
8051       i.prefixes -= 1;
8052     }
8053 
8054   if (i.prefixes != 0)
8055     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8056 
8057   p = frag_more (i.tm.opcode_length + size);
8058   switch (i.tm.opcode_length)
8059     {
8060     case 2:
8061       *p++ = i.tm.base_opcode >> 8;
8062       /* Fall through.  */
8063     case 1:
8064       *p++ = i.tm.base_opcode;
8065       break;
8066     default:
8067       abort ();
8068     }
8069 
8070 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8071   if (size == 4
8072       && jump_reloc == NO_RELOC
8073       && need_plt32_p (i.op[0].disps->X_add_symbol))
8074     jump_reloc = BFD_RELOC_X86_64_PLT32;
8075 #endif
8076 
8077   jump_reloc = reloc (size, 1, 1, jump_reloc);
8078 
8079   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8080 		      i.op[0].disps, 1, jump_reloc);
8081 
8082   /* All jumps handled here are signed, but don't use a signed limit
8083      check for 32 and 16 bit jumps as we want to allow wrap around at
8084      4G and 64k respectively.  */
8085   if (size == 1)
8086     fixP->fx_signed = 1;
8087 }
8088 
8089 static void
8090 output_interseg_jump (void)
8091 {
8092   char *p;
8093   int size;
8094   int prefix;
8095   int code16;
8096 
8097   code16 = 0;
8098   if (flag_code == CODE_16BIT)
8099     code16 = CODE16;
8100 
8101   prefix = 0;
8102   if (i.prefix[DATA_PREFIX] != 0)
8103     {
8104       prefix = 1;
8105       i.prefixes -= 1;
8106       code16 ^= CODE16;
8107     }
8108 
8109   gas_assert (!i.prefix[REX_PREFIX]);
8110 
8111   size = 4;
8112   if (code16)
8113     size = 2;
8114 
8115   if (i.prefixes != 0)
8116     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8117 
8118   /* 1 opcode; 2 segment; offset  */
8119   p = frag_more (prefix + 1 + 2 + size);
8120 
8121   if (i.prefix[DATA_PREFIX] != 0)
8122     *p++ = DATA_PREFIX_OPCODE;
8123 
8124   if (i.prefix[REX_PREFIX] != 0)
8125     *p++ = i.prefix[REX_PREFIX];
8126 
8127   *p++ = i.tm.base_opcode;
8128   if (i.op[1].imms->X_op == O_constant)
8129     {
8130       offsetT n = i.op[1].imms->X_add_number;
8131 
8132       if (size == 2
8133 	  && !fits_in_unsigned_word (n)
8134 	  && !fits_in_signed_word (n))
8135 	{
8136 	  as_bad (_("16-bit jump out of range"));
8137 	  return;
8138 	}
8139       md_number_to_chars (p, n, size);
8140     }
8141   else
8142     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8143 		 i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
8144   if (i.op[0].imms->X_op != O_constant)
8145     as_bad (_("can't handle non absolute segment in `%s'"),
8146 	    i.tm.name);
8147   md_number_to_chars (p + size, (valueT) i.op[0].imms->X_add_number, 2);
8148 }
8149 
8150 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8151 void
8152 x86_cleanup (void)
8153 {
8154   char *p;
8155   asection *seg = now_seg;
8156   subsegT subseg = now_subseg;
8157   asection *sec;
8158   unsigned int alignment, align_size_1;
8159   unsigned int isa_1_descsz, feature_2_descsz, descsz;
8160   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
8161   unsigned int padding;
8162 
8163   if (!IS_ELF || !x86_used_note)
8164     return;
8165 
8166   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
8167 
8168   /* The .note.gnu.property section layout:
8169 
8170      Field	Length		Contents
8171      ----	----		----
8172      n_namsz	4		4
8173      n_descsz	4		The note descriptor size
8174      n_type	4		NT_GNU_PROPERTY_TYPE_0
8175      n_name	4		"GNU"
8176      n_desc	n_descsz	The program property array
8177      ....	....		....
8178    */
8179 
8180   /* Create the .note.gnu.property section.  */
8181   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
8182   bfd_set_section_flags (sec,
8183 			 (SEC_ALLOC
8184 			  | SEC_LOAD
8185 			  | SEC_DATA
8186 			  | SEC_HAS_CONTENTS
8187 			  | SEC_READONLY));
8188 
8189   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
8190     {
8191       align_size_1 = 7;
8192       alignment = 3;
8193     }
8194   else
8195     {
8196       align_size_1 = 3;
8197       alignment = 2;
8198     }
8199 
8200   bfd_set_section_alignment (sec, alignment);
8201   elf_section_type (sec) = SHT_NOTE;
8202 
8203   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
8204 				  + 4-byte data  */
8205   isa_1_descsz_raw = 4 + 4 + 4;
8206   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
8207   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
8208 
8209   feature_2_descsz_raw = isa_1_descsz;
8210   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
8211 				      + 4-byte data  */
8212   feature_2_descsz_raw += 4 + 4 + 4;
8213   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
8214   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
8215 		      & ~align_size_1);
8216 
8217   descsz = feature_2_descsz;
8218   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
8219   p = frag_more (4 + 4 + 4 + 4 + descsz);
8220 
8221   /* Write n_namsz.  */
8222   md_number_to_chars (p, (valueT) 4, 4);
8223 
8224   /* Write n_descsz.  */
8225   md_number_to_chars (p + 4, (valueT) descsz, 4);
8226 
8227   /* Write n_type.  */
8228   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
8229 
8230   /* Write n_name.  */
8231   memcpy (p + 4 * 3, "GNU", 4);
8232 
8233   /* Write 4-byte type.  */
8234   md_number_to_chars (p + 4 * 4,
8235 		      (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
8236 
8237   /* Write 4-byte data size.  */
8238   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
8239 
8240   /* Write 4-byte data.  */
8241   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
8242 
8243   /* Zero out paddings.  */
8244   padding = isa_1_descsz - isa_1_descsz_raw;
8245   if (padding)
8246     memset (p + 4 * 7, 0, padding);
8247 
8248   /* Write 4-byte type.  */
8249   md_number_to_chars (p + isa_1_descsz + 4 * 4,
8250 		      (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
8251 
8252   /* Write 4-byte data size.  */
8253   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
8254 
8255   /* Write 4-byte data.  */
8256   md_number_to_chars (p + isa_1_descsz + 4 * 6,
8257 		      (valueT) x86_feature_2_used, 4);
8258 
8259   /* Zero out paddings.  */
8260   padding = feature_2_descsz - feature_2_descsz_raw;
8261   if (padding)
8262     memset (p + isa_1_descsz + 4 * 7, 0, padding);
8263 
8264   /* We probably can't restore the current segment, for there likely
8265      isn't one yet...  */
8266   if (seg && subseg)
8267     subseg_set (seg, subseg);
8268 }
8269 #endif
8270 
8271 static unsigned int
8272 encoding_length (const fragS *start_frag, offsetT start_off,
8273 		 const char *frag_now_ptr)
8274 {
8275   unsigned int len = 0;
8276 
8277   if (start_frag != frag_now)
8278     {
8279       const fragS *fr = start_frag;
8280 
8281       do {
8282 	len += fr->fr_fix;
8283 	fr = fr->fr_next;
8284       } while (fr && fr != frag_now);
8285     }
8286 
8287   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
8288 }
8289 
8290 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
8291    be macro-fused with conditional jumps.  */
8292 
8293 static int
8294 maybe_fused_with_jcc_p (void)
8295 {
8296   /* No RIP address.  */
8297   if (i.base_reg && i.base_reg->reg_num == RegIP)
8298     return 0;
8299 
8300   /* No VEX/EVEX encoding.  */
8301   if (is_any_vex_encoding (&i.tm))
8302     return 0;
8303 
8304   /* and, add, sub with destination register.  */
8305   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
8306       || i.tm.base_opcode <= 5
8307       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
8308       || ((i.tm.base_opcode | 3) == 0x83
8309 	  && ((i.tm.extension_opcode | 1) == 0x5
8310 	      || i.tm.extension_opcode == 0x0)))
8311     return (i.types[1].bitfield.class == Reg
8312 	    || i.types[1].bitfield.instance == Accum);
8313 
8314   /* test, cmp with any register.  */
8315   if ((i.tm.base_opcode | 1) == 0x85
8316       || (i.tm.base_opcode | 1) == 0xa9
8317       || ((i.tm.base_opcode | 1) == 0xf7
8318 	  && i.tm.extension_opcode == 0)
8319       || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
8320       || ((i.tm.base_opcode | 3) == 0x83
8321 	  && (i.tm.extension_opcode == 0x7)))
8322     return (i.types[0].bitfield.class == Reg
8323 	    || i.types[0].bitfield.instance == Accum
8324 	    || i.types[1].bitfield.class == Reg
8325 	    || i.types[1].bitfield.instance == Accum);
8326 
8327   /* inc, dec with any register.   */
8328   if ((i.tm.cpu_flags.bitfield.cpuno64
8329        && (i.tm.base_opcode | 0xf) == 0x4f)
8330       || ((i.tm.base_opcode | 1) == 0xff
8331 	  && i.tm.extension_opcode <= 0x1))
8332     return (i.types[0].bitfield.class == Reg
8333 	    || i.types[0].bitfield.instance == Accum);
8334 
8335   return 0;
8336 }
8337 
8338 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
8339 
8340 static int
8341 add_fused_jcc_padding_frag_p (void)
8342 {
8343   /* NB: Don't work with COND_JUMP86 without i386.  */
8344   if (!align_branch_power
8345       || now_seg == absolute_section
8346       || !cpu_arch_flags.bitfield.cpui386
8347       || !(align_branch & align_branch_fused_bit))
8348     return 0;
8349 
8350   if (maybe_fused_with_jcc_p ())
8351     {
8352       if (last_insn.kind == last_insn_other
8353 	  || last_insn.seg != now_seg)
8354 	return 1;
8355       if (flag_debug)
8356 	as_warn_where (last_insn.file, last_insn.line,
8357 		       _("`%s` skips -malign-branch-boundary on `%s`"),
8358 		       last_insn.name, i.tm.name);
8359     }
8360 
8361   return 0;
8362 }
8363 
8364 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
8365 
8366 static int
8367 add_branch_prefix_frag_p (void)
8368 {
8369   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
8370      to PadLock instructions since they include prefixes in opcode.  */
8371   if (!align_branch_power
8372       || !align_branch_prefix_size
8373       || now_seg == absolute_section
8374       || i.tm.cpu_flags.bitfield.cpupadlock
8375       || !cpu_arch_flags.bitfield.cpui386)
8376     return 0;
8377 
8378   /* Don't add prefix if it is a prefix or there is no operand in case
8379      that segment prefix is special.  */
8380   if (!i.operands || i.tm.opcode_modifier.isprefix)
8381     return 0;
8382 
8383   if (last_insn.kind == last_insn_other
8384       || last_insn.seg != now_seg)
8385     return 1;
8386 
8387   if (flag_debug)
8388     as_warn_where (last_insn.file, last_insn.line,
8389 		   _("`%s` skips -malign-branch-boundary on `%s`"),
8390 		   last_insn.name, i.tm.name);
8391 
8392   return 0;
8393 }
8394 
8395 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
8396 
8397 static int
8398 add_branch_padding_frag_p (enum align_branch_kind *branch_p)
8399 {
8400   int add_padding;
8401 
8402   /* NB: Don't work with COND_JUMP86 without i386.  */
8403   if (!align_branch_power
8404       || now_seg == absolute_section
8405       || !cpu_arch_flags.bitfield.cpui386)
8406     return 0;
8407 
8408   add_padding = 0;
8409 
8410   /* Check for jcc and direct jmp.  */
8411   if (i.tm.opcode_modifier.jump == JUMP)
8412     {
8413       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
8414 	{
8415 	  *branch_p = align_branch_jmp;
8416 	  add_padding = align_branch & align_branch_jmp_bit;
8417 	}
8418       else
8419 	{
8420 	  *branch_p = align_branch_jcc;
8421 	  if ((align_branch & align_branch_jcc_bit))
8422 	    add_padding = 1;
8423 	}
8424     }
8425   else if (is_any_vex_encoding (&i.tm))
8426     return 0;
8427   else if ((i.tm.base_opcode | 1) == 0xc3)
8428     {
8429       /* Near ret.  */
8430       *branch_p = align_branch_ret;
8431       if ((align_branch & align_branch_ret_bit))
8432 	add_padding = 1;
8433     }
8434   else
8435     {
8436       /* Check for indirect jmp, direct and indirect calls.  */
8437       if (i.tm.base_opcode == 0xe8)
8438 	{
8439 	  /* Direct call.  */
8440 	  *branch_p = align_branch_call;
8441 	  if ((align_branch & align_branch_call_bit))
8442 	    add_padding = 1;
8443 	}
8444       else if (i.tm.base_opcode == 0xff
8445 	       && (i.tm.extension_opcode == 2
8446 		   || i.tm.extension_opcode == 4))
8447 	{
8448 	  /* Indirect call and jmp.  */
8449 	  *branch_p = align_branch_indirect;
8450 	  if ((align_branch & align_branch_indirect_bit))
8451 	    add_padding = 1;
8452 	}
8453 
8454       if (add_padding
8455 	  && i.disp_operands
8456 	  && tls_get_addr
8457 	  && (i.op[0].disps->X_op == O_symbol
8458 	      || (i.op[0].disps->X_op == O_subtract
8459 		  && i.op[0].disps->X_op_symbol == GOT_symbol)))
8460 	{
8461 	  symbolS *s = i.op[0].disps->X_add_symbol;
8462 	  /* No padding to call to global or undefined tls_get_addr.  */
8463 	  if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
8464 	      && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
8465 	    return 0;
8466 	}
8467     }
8468 
8469   if (add_padding
8470       && last_insn.kind != last_insn_other
8471       && last_insn.seg == now_seg)
8472     {
8473       if (flag_debug)
8474 	as_warn_where (last_insn.file, last_insn.line,
8475 		       _("`%s` skips -malign-branch-boundary on `%s`"),
8476 		       last_insn.name, i.tm.name);
8477       return 0;
8478     }
8479 
8480   return add_padding;
8481 }
8482 
8483 static void
8484 output_insn (void)
8485 {
8486   fragS *insn_start_frag;
8487   offsetT insn_start_off;
8488   fragS *fragP = NULL;
8489   enum align_branch_kind branch = align_branch_none;
8490 
8491 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8492   if (IS_ELF && x86_used_note)
8493     {
8494       if (i.tm.cpu_flags.bitfield.cpucmov)
8495 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_CMOV;
8496       if (i.tm.cpu_flags.bitfield.cpusse)
8497 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE;
8498       if (i.tm.cpu_flags.bitfield.cpusse2)
8499 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE2;
8500       if (i.tm.cpu_flags.bitfield.cpusse3)
8501 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE3;
8502       if (i.tm.cpu_flags.bitfield.cpussse3)
8503 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSSE3;
8504       if (i.tm.cpu_flags.bitfield.cpusse4_1)
8505 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_1;
8506       if (i.tm.cpu_flags.bitfield.cpusse4_2)
8507 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_SSE4_2;
8508       if (i.tm.cpu_flags.bitfield.cpuavx)
8509 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX;
8510       if (i.tm.cpu_flags.bitfield.cpuavx2)
8511 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX2;
8512       if (i.tm.cpu_flags.bitfield.cpufma)
8513 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_FMA;
8514       if (i.tm.cpu_flags.bitfield.cpuavx512f)
8515 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512F;
8516       if (i.tm.cpu_flags.bitfield.cpuavx512cd)
8517 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512CD;
8518       if (i.tm.cpu_flags.bitfield.cpuavx512er)
8519 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512ER;
8520       if (i.tm.cpu_flags.bitfield.cpuavx512pf)
8521 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512PF;
8522       if (i.tm.cpu_flags.bitfield.cpuavx512vl)
8523 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512VL;
8524       if (i.tm.cpu_flags.bitfield.cpuavx512dq)
8525 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512DQ;
8526       if (i.tm.cpu_flags.bitfield.cpuavx512bw)
8527 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512BW;
8528       if (i.tm.cpu_flags.bitfield.cpuavx512_4fmaps)
8529 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4FMAPS;
8530       if (i.tm.cpu_flags.bitfield.cpuavx512_4vnniw)
8531 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_4VNNIW;
8532       if (i.tm.cpu_flags.bitfield.cpuavx512_bitalg)
8533 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BITALG;
8534       if (i.tm.cpu_flags.bitfield.cpuavx512ifma)
8535 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_IFMA;
8536       if (i.tm.cpu_flags.bitfield.cpuavx512vbmi)
8537 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI;
8538       if (i.tm.cpu_flags.bitfield.cpuavx512_vbmi2)
8539 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VBMI2;
8540       if (i.tm.cpu_flags.bitfield.cpuavx512_vnni)
8541 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_VNNI;
8542       if (i.tm.cpu_flags.bitfield.cpuavx512_bf16)
8543 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_AVX512_BF16;
8544 
8545       if (i.tm.cpu_flags.bitfield.cpu8087
8546 	  || i.tm.cpu_flags.bitfield.cpu287
8547 	  || i.tm.cpu_flags.bitfield.cpu387
8548 	  || i.tm.cpu_flags.bitfield.cpu687
8549 	  || i.tm.cpu_flags.bitfield.cpufisttp)
8550 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
8551       if (i.has_regmmx
8552 	  || i.tm.base_opcode == 0xf77 /* emms */
8553 	  || i.tm.base_opcode == 0xf0e /* femms */)
8554 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
8555       if (i.has_regxmm)
8556 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
8557       if (i.has_regymm)
8558 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
8559       if (i.has_regzmm)
8560 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
8561       if (i.tm.cpu_flags.bitfield.cpufxsr)
8562 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
8563       if (i.tm.cpu_flags.bitfield.cpuxsave)
8564 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
8565       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
8566 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
8567       if (i.tm.cpu_flags.bitfield.cpuxsavec)
8568 	x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
8569     }
8570 #endif
8571 
8572   /* Tie dwarf2 debug info to the address at the start of the insn.
8573      We can't do this after the insn has been output as the current
8574      frag may have been closed off.  eg. by frag_var.  */
8575   dwarf2_emit_insn (0);
8576 
8577   insn_start_frag = frag_now;
8578   insn_start_off = frag_now_fix ();
8579 
8580   if (add_branch_padding_frag_p (&branch))
8581     {
8582       char *p;
8583       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
8584       unsigned int max_branch_padding_size = 14;
8585 
8586       /* Align section to boundary.  */
8587       record_alignment (now_seg, align_branch_power);
8588 
8589       /* Make room for padding.  */
8590       frag_grow (max_branch_padding_size);
8591 
8592       /* Start of the padding.  */
8593       p = frag_more (0);
8594 
8595       fragP = frag_now;
8596 
8597       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
8598 		ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
8599 		NULL, 0, p);
8600 
8601       fragP->tc_frag_data.branch_type = branch;
8602       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
8603     }
8604 
8605   /* Output jumps.  */
8606   if (i.tm.opcode_modifier.jump == JUMP)
8607     output_branch ();
8608   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
8609 	   || i.tm.opcode_modifier.jump == JUMP_DWORD)
8610     output_jump ();
8611   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
8612     output_interseg_jump ();
8613   else
8614     {
8615       /* Output normal instructions here.  */
8616       char *p;
8617       unsigned char *q;
8618       unsigned int j;
8619       unsigned int prefix;
8620 
8621       if (avoid_fence
8622 	  && (i.tm.base_opcode == 0xfaee8
8623 	      || i.tm.base_opcode == 0xfaef0
8624 	      || i.tm.base_opcode == 0xfaef8))
8625         {
8626           /* Encode lfence, mfence, and sfence as
8627              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
8628           offsetT val = 0x240483f0ULL;
8629           p = frag_more (5);
8630           md_number_to_chars (p, val, 5);
8631           return;
8632         }
8633 
8634       /* Some processors fail on LOCK prefix. This options makes
8635 	 assembler ignore LOCK prefix and serves as a workaround.  */
8636       if (omit_lock_prefix)
8637 	{
8638 	  if (i.tm.base_opcode == LOCK_PREFIX_OPCODE)
8639 	    return;
8640 	  i.prefix[LOCK_PREFIX] = 0;
8641 	}
8642 
8643       if (branch)
8644 	/* Skip if this is a branch.  */
8645 	;
8646       else if (add_fused_jcc_padding_frag_p ())
8647 	{
8648 	  /* Make room for padding.  */
8649 	  frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
8650 	  p = frag_more (0);
8651 
8652 	  fragP = frag_now;
8653 
8654 	  frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
8655 		    ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
8656 		    NULL, 0, p);
8657 
8658 	  fragP->tc_frag_data.branch_type = align_branch_fused;
8659 	  fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
8660 	}
8661       else if (add_branch_prefix_frag_p ())
8662 	{
8663 	  unsigned int max_prefix_size = align_branch_prefix_size;
8664 
8665 	  /* Make room for padding.  */
8666 	  frag_grow (max_prefix_size);
8667 	  p = frag_more (0);
8668 
8669 	  fragP = frag_now;
8670 
8671 	  frag_var (rs_machine_dependent, max_prefix_size, 0,
8672 		    ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
8673 		    NULL, 0, p);
8674 
8675 	  fragP->tc_frag_data.max_bytes = max_prefix_size;
8676 	}
8677 
8678       /* Since the VEX/EVEX prefix contains the implicit prefix, we
8679 	 don't need the explicit prefix.  */
8680       if (!i.tm.opcode_modifier.vex && !i.tm.opcode_modifier.evex)
8681 	{
8682 	  switch (i.tm.opcode_length)
8683 	    {
8684 	    case 3:
8685 	      if (i.tm.base_opcode & 0xff000000)
8686 		{
8687 		  prefix = (i.tm.base_opcode >> 24) & 0xff;
8688 		  if (!i.tm.cpu_flags.bitfield.cpupadlock
8689 		      || prefix != REPE_PREFIX_OPCODE
8690 		      || (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE))
8691 		    add_prefix (prefix);
8692 		}
8693 	      break;
8694 	    case 2:
8695 	      if ((i.tm.base_opcode & 0xff0000) != 0)
8696 		{
8697 		  prefix = (i.tm.base_opcode >> 16) & 0xff;
8698 		  add_prefix (prefix);
8699 		}
8700 	      break;
8701 	    case 1:
8702 	      break;
8703 	    case 0:
8704 	      /* Check for pseudo prefixes.  */
8705 	      as_bad_where (insn_start_frag->fr_file,
8706 			    insn_start_frag->fr_line,
8707 			     _("pseudo prefix without instruction"));
8708 	      return;
8709 	    default:
8710 	      abort ();
8711 	    }
8712 
8713 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
8714 	  /* For x32, add a dummy REX_OPCODE prefix for mov/add with
8715 	     R_X86_64_GOTTPOFF relocation so that linker can safely
8716 	     perform IE->LE optimization.  */
8717 	  if (x86_elf_abi == X86_64_X32_ABI
8718 	      && i.operands == 2
8719 	      && i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
8720 	      && i.prefix[REX_PREFIX] == 0)
8721 	    add_prefix (REX_OPCODE);
8722 #endif
8723 
8724 	  /* The prefix bytes.  */
8725 	  for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
8726 	    if (*q)
8727 	      FRAG_APPEND_1_CHAR (*q);
8728 	}
8729       else
8730 	{
8731 	  for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
8732 	    if (*q)
8733 	      switch (j)
8734 		{
8735 		case REX_PREFIX:
8736 		  /* REX byte is encoded in VEX prefix.  */
8737 		  break;
8738 		case SEG_PREFIX:
8739 		case ADDR_PREFIX:
8740 		  FRAG_APPEND_1_CHAR (*q);
8741 		  break;
8742 		default:
8743 		  /* There should be no other prefixes for instructions
8744 		     with VEX prefix.  */
8745 		  abort ();
8746 		}
8747 
8748 	  /* For EVEX instructions i.vrex should become 0 after
8749 	     build_evex_prefix.  For VEX instructions upper 16 registers
8750 	     aren't available, so VREX should be 0.  */
8751 	  if (i.vrex)
8752 	    abort ();
8753 	  /* Now the VEX prefix.  */
8754 	  p = frag_more (i.vex.length);
8755 	  for (j = 0; j < i.vex.length; j++)
8756 	    p[j] = i.vex.bytes[j];
8757 	}
8758 
8759       /* Now the opcode; be careful about word order here!  */
8760       if (i.tm.opcode_length == 1)
8761 	{
8762 	  FRAG_APPEND_1_CHAR (i.tm.base_opcode);
8763 	}
8764       else
8765 	{
8766 	  switch (i.tm.opcode_length)
8767 	    {
8768 	    case 4:
8769 	      p = frag_more (4);
8770 	      *p++ = (i.tm.base_opcode >> 24) & 0xff;
8771 	      *p++ = (i.tm.base_opcode >> 16) & 0xff;
8772 	      break;
8773 	    case 3:
8774 	      p = frag_more (3);
8775 	      *p++ = (i.tm.base_opcode >> 16) & 0xff;
8776 	      break;
8777 	    case 2:
8778 	      p = frag_more (2);
8779 	      break;
8780 	    default:
8781 	      abort ();
8782 	      break;
8783 	    }
8784 
8785 	  /* Put out high byte first: can't use md_number_to_chars!  */
8786 	  *p++ = (i.tm.base_opcode >> 8) & 0xff;
8787 	  *p = i.tm.base_opcode & 0xff;
8788 	}
8789 
8790       /* Now the modrm byte and sib byte (if present).  */
8791       if (i.tm.opcode_modifier.modrm)
8792 	{
8793 	  FRAG_APPEND_1_CHAR ((i.rm.regmem << 0
8794 			       | i.rm.reg << 3
8795 			       | i.rm.mode << 6));
8796 	  /* If i.rm.regmem == ESP (4)
8797 	     && i.rm.mode != (Register mode)
8798 	     && not 16 bit
8799 	     ==> need second modrm byte.  */
8800 	  if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
8801 	      && i.rm.mode != 3
8802 	      && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
8803 	    FRAG_APPEND_1_CHAR ((i.sib.base << 0
8804 				 | i.sib.index << 3
8805 				 | i.sib.scale << 6));
8806 	}
8807 
8808       if (i.disp_operands)
8809 	output_disp (insn_start_frag, insn_start_off);
8810 
8811       if (i.imm_operands)
8812 	output_imm (insn_start_frag, insn_start_off);
8813 
8814       /*
8815        * frag_now_fix () returning plain abs_section_offset when we're in the
8816        * absolute section, and abs_section_offset not getting updated as data
8817        * gets added to the frag breaks the logic below.
8818        */
8819       if (now_seg != absolute_section)
8820 	{
8821 	  j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
8822 	  if (j > 15)
8823 	    as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
8824 		     j);
8825 	  else if (fragP)
8826 	    {
8827 	      /* NB: Don't add prefix with GOTPC relocation since
8828 		 output_disp() above depends on the fixed encoding
8829 		 length.  Can't add prefix with TLS relocation since
8830 		 it breaks TLS linker optimization.  */
8831 	      unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
8832 	      /* Prefix count on the current instruction.  */
8833 	      unsigned int count = i.vex.length;
8834 	      unsigned int k;
8835 	      for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
8836 		/* REX byte is encoded in VEX/EVEX prefix.  */
8837 		if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
8838 		  count++;
8839 
8840 	      /* Count prefixes for extended opcode maps.  */
8841 	      if (!i.vex.length)
8842 		switch (i.tm.opcode_length)
8843 		  {
8844 		  case 3:
8845 		    if (((i.tm.base_opcode >> 16) & 0xff) == 0xf)
8846 		      {
8847 			count++;
8848 			switch ((i.tm.base_opcode >> 8) & 0xff)
8849 			  {
8850 			  case 0x38:
8851 			  case 0x3a:
8852 			    count++;
8853 			    break;
8854 			  default:
8855 			    break;
8856 			  }
8857 		      }
8858 		    break;
8859 		  case 2:
8860 		    if (((i.tm.base_opcode >> 8) & 0xff) == 0xf)
8861 		      count++;
8862 		    break;
8863 		  case 1:
8864 		    break;
8865 		  default:
8866 		    abort ();
8867 		  }
8868 
8869 	      if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
8870 		  == BRANCH_PREFIX)
8871 		{
8872 		  /* Set the maximum prefix size in BRANCH_PREFIX
8873 		     frag.  */
8874 		  if (fragP->tc_frag_data.max_bytes > max)
8875 		    fragP->tc_frag_data.max_bytes = max;
8876 		  if (fragP->tc_frag_data.max_bytes > count)
8877 		    fragP->tc_frag_data.max_bytes -= count;
8878 		  else
8879 		    fragP->tc_frag_data.max_bytes = 0;
8880 		}
8881 	      else
8882 		{
8883 		  /* Remember the maximum prefix size in FUSED_JCC_PADDING
8884 		     frag.  */
8885 		  unsigned int max_prefix_size;
8886 		  if (align_branch_prefix_size > max)
8887 		    max_prefix_size = max;
8888 		  else
8889 		    max_prefix_size = align_branch_prefix_size;
8890 		  if (max_prefix_size > count)
8891 		    fragP->tc_frag_data.max_prefix_length
8892 		      = max_prefix_size - count;
8893 		}
8894 
8895 	      /* Use existing segment prefix if possible.  Use CS
8896 		 segment prefix in 64-bit mode.  In 32-bit mode, use SS
8897 		 segment prefix with ESP/EBP base register and use DS
8898 		 segment prefix without ESP/EBP base register.  */
8899 	      if (i.prefix[SEG_PREFIX])
8900 		fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
8901 	      else if (flag_code == CODE_64BIT)
8902 		fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
8903 	      else if (i.base_reg
8904 		       && (i.base_reg->reg_num == 4
8905 			   || i.base_reg->reg_num == 5))
8906 		fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
8907 	      else
8908 		fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
8909 	    }
8910 	}
8911     }
8912 
8913   /* NB: Don't work with COND_JUMP86 without i386.  */
8914   if (align_branch_power
8915       && now_seg != absolute_section
8916       && cpu_arch_flags.bitfield.cpui386)
8917     {
8918       /* Terminate each frag so that we can add prefix and check for
8919          fused jcc.  */
8920       frag_wane (frag_now);
8921       frag_new (0);
8922     }
8923 
8924 #ifdef DEBUG386
8925   if (flag_debug)
8926     {
8927       pi ("" /*line*/, &i);
8928     }
8929 #endif /* DEBUG386  */
8930 }
8931 
8932 /* Return the size of the displacement operand N.  */
8933 
8934 static int
8935 disp_size (unsigned int n)
8936 {
8937   int size = 4;
8938 
8939   if (i.types[n].bitfield.disp64)
8940     size = 8;
8941   else if (i.types[n].bitfield.disp8)
8942     size = 1;
8943   else if (i.types[n].bitfield.disp16)
8944     size = 2;
8945   return size;
8946 }
8947 
8948 /* Return the size of the immediate operand N.  */
8949 
8950 static int
8951 imm_size (unsigned int n)
8952 {
8953   int size = 4;
8954   if (i.types[n].bitfield.imm64)
8955     size = 8;
8956   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
8957     size = 1;
8958   else if (i.types[n].bitfield.imm16)
8959     size = 2;
8960   return size;
8961 }
8962 
8963 static void
8964 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
8965 {
8966   char *p;
8967   unsigned int n;
8968 
8969   for (n = 0; n < i.operands; n++)
8970     {
8971       if (operand_type_check (i.types[n], disp))
8972 	{
8973 	  if (i.op[n].disps->X_op == O_constant)
8974 	    {
8975 	      int size = disp_size (n);
8976 	      offsetT val = i.op[n].disps->X_add_number;
8977 
8978 	      val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
8979 				     size);
8980 	      p = frag_more (size);
8981 	      md_number_to_chars (p, val, size);
8982 	    }
8983 	  else
8984 	    {
8985 	      enum bfd_reloc_code_real reloc_type;
8986 	      int size = disp_size (n);
8987 	      int sign = i.types[n].bitfield.disp32s;
8988 	      int pcrel = (i.flags[n] & Operand_PCrel) != 0;
8989 	      fixS *fixP;
8990 
8991 	      /* We can't have 8 bit displacement here.  */
8992 	      gas_assert (!i.types[n].bitfield.disp8);
8993 
8994 	      /* The PC relative address is computed relative
8995 		 to the instruction boundary, so in case immediate
8996 		 fields follows, we need to adjust the value.  */
8997 	      if (pcrel && i.imm_operands)
8998 		{
8999 		  unsigned int n1;
9000 		  int sz = 0;
9001 
9002 		  for (n1 = 0; n1 < i.operands; n1++)
9003 		    if (operand_type_check (i.types[n1], imm))
9004 		      {
9005 			/* Only one immediate is allowed for PC
9006 			   relative address.  */
9007 			gas_assert (sz == 0);
9008 			sz = imm_size (n1);
9009 			i.op[n].disps->X_add_number -= sz;
9010 		      }
9011 		  /* We should find the immediate.  */
9012 		  gas_assert (sz != 0);
9013 		}
9014 
9015 	      p = frag_more (size);
9016 	      reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
9017 	      if (GOT_symbol
9018 		  && GOT_symbol == i.op[n].disps->X_add_symbol
9019 		  && (((reloc_type == BFD_RELOC_32
9020 			|| reloc_type == BFD_RELOC_X86_64_32S
9021 			|| (reloc_type == BFD_RELOC_64
9022 			    && object_64bit))
9023 		       && (i.op[n].disps->X_op == O_symbol
9024 			   || (i.op[n].disps->X_op == O_add
9025 			       && ((symbol_get_value_expression
9026 				    (i.op[n].disps->X_op_symbol)->X_op)
9027 				   == O_subtract))))
9028 		      || reloc_type == BFD_RELOC_32_PCREL))
9029 		{
9030 		  if (!object_64bit)
9031 		    {
9032 		      reloc_type = BFD_RELOC_386_GOTPC;
9033 		      i.has_gotpc_tls_reloc = TRUE;
9034 		      i.op[n].imms->X_add_number +=
9035 			encoding_length (insn_start_frag, insn_start_off, p);
9036 		    }
9037 		  else if (reloc_type == BFD_RELOC_64)
9038 		    reloc_type = BFD_RELOC_X86_64_GOTPC64;
9039 		  else
9040 		    /* Don't do the adjustment for x86-64, as there
9041 		       the pcrel addressing is relative to the _next_
9042 		       insn, and that is taken care of in other code.  */
9043 		    reloc_type = BFD_RELOC_X86_64_GOTPC32;
9044 		}
9045 	      else if (align_branch_power)
9046 		{
9047 		  switch (reloc_type)
9048 		    {
9049 		    case BFD_RELOC_386_TLS_GD:
9050 		    case BFD_RELOC_386_TLS_LDM:
9051 		    case BFD_RELOC_386_TLS_IE:
9052 		    case BFD_RELOC_386_TLS_IE_32:
9053 		    case BFD_RELOC_386_TLS_GOTIE:
9054 		    case BFD_RELOC_386_TLS_GOTDESC:
9055 		    case BFD_RELOC_386_TLS_DESC_CALL:
9056 		    case BFD_RELOC_X86_64_TLSGD:
9057 		    case BFD_RELOC_X86_64_TLSLD:
9058 		    case BFD_RELOC_X86_64_GOTTPOFF:
9059 		    case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
9060 		    case BFD_RELOC_X86_64_TLSDESC_CALL:
9061 		      i.has_gotpc_tls_reloc = TRUE;
9062 		    default:
9063 		      break;
9064 		    }
9065 		}
9066 	      fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
9067 				  size, i.op[n].disps, pcrel,
9068 				  reloc_type);
9069 	      /* Check for "call/jmp *mem", "mov mem, %reg",
9070 		 "test %reg, mem" and "binop mem, %reg" where binop
9071 		 is one of adc, add, and, cmp, or, sbb, sub, xor
9072 		 instructions without data prefix.  Always generate
9073 		 R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
9074 	      if (i.prefix[DATA_PREFIX] == 0
9075 		  && (generate_relax_relocations
9076 		      || (!object_64bit
9077 			  && i.rm.mode == 0
9078 			  && i.rm.regmem == 5))
9079 		  && (i.rm.mode == 2
9080 		      || (i.rm.mode == 0 && i.rm.regmem == 5))
9081 		  && ((i.operands == 1
9082 		       && i.tm.base_opcode == 0xff
9083 		       && (i.rm.reg == 2 || i.rm.reg == 4))
9084 		      || (i.operands == 2
9085 			  && (i.tm.base_opcode == 0x8b
9086 			      || i.tm.base_opcode == 0x85
9087 			      || (i.tm.base_opcode & 0xc7) == 0x03))))
9088 		{
9089 		  if (object_64bit)
9090 		    {
9091 		      fixP->fx_tcbit = i.rex != 0;
9092 		      if (i.base_reg
9093 			  && (i.base_reg->reg_num == RegIP))
9094 		      fixP->fx_tcbit2 = 1;
9095 		    }
9096 		  else
9097 		    fixP->fx_tcbit2 = 1;
9098 		}
9099 	    }
9100 	}
9101     }
9102 }
9103 
9104 static void
9105 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
9106 {
9107   char *p;
9108   unsigned int n;
9109 
9110   for (n = 0; n < i.operands; n++)
9111     {
9112       /* Skip SAE/RC Imm operand in EVEX.  They are already handled.  */
9113       if (i.rounding && (int) n == i.rounding->operand)
9114 	continue;
9115 
9116       if (operand_type_check (i.types[n], imm))
9117 	{
9118 	  if (i.op[n].imms->X_op == O_constant)
9119 	    {
9120 	      int size = imm_size (n);
9121 	      offsetT val;
9122 
9123 	      val = offset_in_range (i.op[n].imms->X_add_number,
9124 				     size);
9125 	      p = frag_more (size);
9126 	      md_number_to_chars (p, val, size);
9127 	    }
9128 	  else
9129 	    {
9130 	      /* Not absolute_section.
9131 		 Need a 32-bit fixup (don't support 8bit
9132 		 non-absolute imms).  Try to support other
9133 		 sizes ...  */
9134 	      enum bfd_reloc_code_real reloc_type;
9135 	      int size = imm_size (n);
9136 	      int sign;
9137 
9138 	      if (i.types[n].bitfield.imm32s
9139 		  && (i.suffix == QWORD_MNEM_SUFFIX
9140 		      || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
9141 		sign = 1;
9142 	      else
9143 		sign = 0;
9144 
9145 	      p = frag_more (size);
9146 	      reloc_type = reloc (size, 0, sign, i.reloc[n]);
9147 
9148 	      /*   This is tough to explain.  We end up with this one if we
9149 	       * have operands that look like
9150 	       * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
9151 	       * obtain the absolute address of the GOT, and it is strongly
9152 	       * preferable from a performance point of view to avoid using
9153 	       * a runtime relocation for this.  The actual sequence of
9154 	       * instructions often look something like:
9155 	       *
9156 	       *	call	.L66
9157 	       * .L66:
9158 	       *	popl	%ebx
9159 	       *	addl	$_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
9160 	       *
9161 	       *   The call and pop essentially return the absolute address
9162 	       * of the label .L66 and store it in %ebx.  The linker itself
9163 	       * will ultimately change the first operand of the addl so
9164 	       * that %ebx points to the GOT, but to keep things simple, the
9165 	       * .o file must have this operand set so that it generates not
9166 	       * the absolute address of .L66, but the absolute address of
9167 	       * itself.  This allows the linker itself simply treat a GOTPC
9168 	       * relocation as asking for a pcrel offset to the GOT to be
9169 	       * added in, and the addend of the relocation is stored in the
9170 	       * operand field for the instruction itself.
9171 	       *
9172 	       *   Our job here is to fix the operand so that it would add
9173 	       * the correct offset so that %ebx would point to itself.  The
9174 	       * thing that is tricky is that .-.L66 will point to the
9175 	       * beginning of the instruction, so we need to further modify
9176 	       * the operand so that it will point to itself.  There are
9177 	       * other cases where you have something like:
9178 	       *
9179 	       *	.long	$_GLOBAL_OFFSET_TABLE_+[.-.L66]
9180 	       *
9181 	       * and here no correction would be required.  Internally in
9182 	       * the assembler we treat operands of this form as not being
9183 	       * pcrel since the '.' is explicitly mentioned, and I wonder
9184 	       * whether it would simplify matters to do it this way.  Who
9185 	       * knows.  In earlier versions of the PIC patches, the
9186 	       * pcrel_adjust field was used to store the correction, but
9187 	       * since the expression is not pcrel, I felt it would be
9188 	       * confusing to do it this way.  */
9189 
9190 	      if ((reloc_type == BFD_RELOC_32
9191 		   || reloc_type == BFD_RELOC_X86_64_32S
9192 		   || reloc_type == BFD_RELOC_64)
9193 		  && GOT_symbol
9194 		  && GOT_symbol == i.op[n].imms->X_add_symbol
9195 		  && (i.op[n].imms->X_op == O_symbol
9196 		      || (i.op[n].imms->X_op == O_add
9197 			  && ((symbol_get_value_expression
9198 			       (i.op[n].imms->X_op_symbol)->X_op)
9199 			      == O_subtract))))
9200 		{
9201 		  if (!object_64bit)
9202 		    reloc_type = BFD_RELOC_386_GOTPC;
9203 		  else if (size == 4)
9204 		    reloc_type = BFD_RELOC_X86_64_GOTPC32;
9205 		  else if (size == 8)
9206 		    reloc_type = BFD_RELOC_X86_64_GOTPC64;
9207 		  i.has_gotpc_tls_reloc = TRUE;
9208 		  i.op[n].imms->X_add_number +=
9209 		    encoding_length (insn_start_frag, insn_start_off, p);
9210 		}
9211 	      fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9212 			   i.op[n].imms, 0, reloc_type);
9213 	    }
9214 	}
9215     }
9216 }
9217 
9218 /* x86_cons_fix_new is called via the expression parsing code when a
9219    reloc is needed.  We use this hook to get the correct .got reloc.  */
9220 static int cons_sign = -1;
9221 
9222 void
9223 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
9224 		  expressionS *exp, bfd_reloc_code_real_type r)
9225 {
9226   r = reloc (len, 0, cons_sign, r);
9227 
9228 #ifdef TE_PE
9229   if (exp->X_op == O_secrel)
9230     {
9231       exp->X_op = O_symbol;
9232       r = BFD_RELOC_32_SECREL;
9233     }
9234 #endif
9235 
9236   fix_new_exp (frag, off, len, exp, 0, r);
9237 }
9238 
9239 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
9240    purpose of the `.dc.a' internal pseudo-op.  */
9241 
9242 int
9243 x86_address_bytes (void)
9244 {
9245   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
9246     return 4;
9247   return stdoutput->arch_info->bits_per_address / 8;
9248 }
9249 
9250 #if !(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
9251     || defined (LEX_AT)
9252 # define lex_got(reloc, adjust, types) NULL
9253 #else
9254 /* Parse operands of the form
9255    <symbol>@GOTOFF+<nnn>
9256    and similar .plt or .got references.
9257 
9258    If we find one, set up the correct relocation in RELOC and copy the
9259    input string, minus the `@GOTOFF' into a malloc'd buffer for
9260    parsing by the calling routine.  Return this buffer, and if ADJUST
9261    is non-null set it to the length of the string we removed from the
9262    input line.  Otherwise return NULL.  */
9263 static char *
9264 lex_got (enum bfd_reloc_code_real *rel,
9265 	 int *adjust,
9266 	 i386_operand_type *types)
9267 {
9268   /* Some of the relocations depend on the size of what field is to
9269      be relocated.  But in our callers i386_immediate and i386_displacement
9270      we don't yet know the operand size (this will be set by insn
9271      matching).  Hence we record the word32 relocation here,
9272      and adjust the reloc according to the real size in reloc().  */
9273   static const struct {
9274     const char *str;
9275     int len;
9276     const enum bfd_reloc_code_real rel[2];
9277     const i386_operand_type types64;
9278   } gotrel[] = {
9279 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9280     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
9281 					BFD_RELOC_SIZE32 },
9282       OPERAND_TYPE_IMM32_64 },
9283 #endif
9284     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
9285 				       BFD_RELOC_X86_64_PLTOFF64 },
9286       OPERAND_TYPE_IMM64 },
9287     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
9288 				       BFD_RELOC_X86_64_PLT32    },
9289       OPERAND_TYPE_IMM32_32S_DISP32 },
9290     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
9291 				       BFD_RELOC_X86_64_GOTPLT64 },
9292       OPERAND_TYPE_IMM64_DISP64 },
9293     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
9294 				       BFD_RELOC_X86_64_GOTOFF64 },
9295       OPERAND_TYPE_IMM64_DISP64 },
9296     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
9297 				       BFD_RELOC_X86_64_GOTPCREL },
9298       OPERAND_TYPE_IMM32_32S_DISP32 },
9299     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
9300 				       BFD_RELOC_X86_64_TLSGD    },
9301       OPERAND_TYPE_IMM32_32S_DISP32 },
9302     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
9303 				       _dummy_first_bfd_reloc_code_real },
9304       OPERAND_TYPE_NONE },
9305     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
9306 				       BFD_RELOC_X86_64_TLSLD    },
9307       OPERAND_TYPE_IMM32_32S_DISP32 },
9308     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
9309 				       BFD_RELOC_X86_64_GOTTPOFF },
9310       OPERAND_TYPE_IMM32_32S_DISP32 },
9311     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
9312 				       BFD_RELOC_X86_64_TPOFF32  },
9313       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9314     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
9315 				       _dummy_first_bfd_reloc_code_real },
9316       OPERAND_TYPE_NONE },
9317     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
9318 				       BFD_RELOC_X86_64_DTPOFF32 },
9319       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9320     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
9321 				       _dummy_first_bfd_reloc_code_real },
9322       OPERAND_TYPE_NONE },
9323     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
9324 				       _dummy_first_bfd_reloc_code_real },
9325       OPERAND_TYPE_NONE },
9326     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
9327 				       BFD_RELOC_X86_64_GOT32    },
9328       OPERAND_TYPE_IMM32_32S_64_DISP32 },
9329     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
9330 				       BFD_RELOC_X86_64_GOTPC32_TLSDESC },
9331       OPERAND_TYPE_IMM32_32S_DISP32 },
9332     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
9333 				       BFD_RELOC_X86_64_TLSDESC_CALL },
9334       OPERAND_TYPE_IMM32_32S_DISP32 },
9335   };
9336   char *cp;
9337   unsigned int j;
9338 
9339 #if defined (OBJ_MAYBE_ELF)
9340   if (!IS_ELF)
9341     return NULL;
9342 #endif
9343 
9344   for (cp = input_line_pointer; *cp != '@'; cp++)
9345     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9346       return NULL;
9347 
9348   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9349     {
9350       int len = gotrel[j].len;
9351       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9352 	{
9353 	  if (gotrel[j].rel[object_64bit] != 0)
9354 	    {
9355 	      int first, second;
9356 	      char *tmpbuf, *past_reloc;
9357 
9358 	      *rel = gotrel[j].rel[object_64bit];
9359 
9360 	      if (types)
9361 		{
9362 		  if (flag_code != CODE_64BIT)
9363 		    {
9364 		      types->bitfield.imm32 = 1;
9365 		      types->bitfield.disp32 = 1;
9366 		    }
9367 		  else
9368 		    *types = gotrel[j].types64;
9369 		}
9370 
9371 	      if (j != 0 && GOT_symbol == NULL)
9372 		GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
9373 
9374 	      /* The length of the first part of our input line.  */
9375 	      first = cp - input_line_pointer;
9376 
9377 	      /* The second part goes from after the reloc token until
9378 		 (and including) an end_of_line char or comma.  */
9379 	      past_reloc = cp + 1 + len;
9380 	      cp = past_reloc;
9381 	      while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9382 		++cp;
9383 	      second = cp + 1 - past_reloc;
9384 
9385 	      /* Allocate and copy string.  The trailing NUL shouldn't
9386 		 be necessary, but be safe.  */
9387 	      tmpbuf = XNEWVEC (char, first + second + 2);
9388 	      memcpy (tmpbuf, input_line_pointer, first);
9389 	      if (second != 0 && *past_reloc != ' ')
9390 		/* Replace the relocation token with ' ', so that
9391 		   errors like foo@GOTOFF1 will be detected.  */
9392 		tmpbuf[first++] = ' ';
9393 	      else
9394 		/* Increment length by 1 if the relocation token is
9395 		   removed.  */
9396 		len++;
9397 	      if (adjust)
9398 		*adjust = len;
9399 	      memcpy (tmpbuf + first, past_reloc, second);
9400 	      tmpbuf[first + second] = '\0';
9401 	      return tmpbuf;
9402 	    }
9403 
9404 	  as_bad (_("@%s reloc is not supported with %d-bit output format"),
9405 		  gotrel[j].str, 1 << (5 + object_64bit));
9406 	  return NULL;
9407 	}
9408     }
9409 
9410   /* Might be a symbol version string.  Don't as_bad here.  */
9411   return NULL;
9412 }
9413 #endif
9414 
9415 #ifdef TE_PE
9416 #ifdef lex_got
9417 #undef lex_got
9418 #endif
9419 /* Parse operands of the form
9420    <symbol>@SECREL32+<nnn>
9421 
9422    If we find one, set up the correct relocation in RELOC and copy the
9423    input string, minus the `@SECREL32' into a malloc'd buffer for
9424    parsing by the calling routine.  Return this buffer, and if ADJUST
9425    is non-null set it to the length of the string we removed from the
9426    input line.  Otherwise return NULL.
9427 
9428    This function is copied from the ELF version above adjusted for PE targets.  */
9429 
9430 static char *
9431 lex_got (enum bfd_reloc_code_real *rel ATTRIBUTE_UNUSED,
9432 	 int *adjust ATTRIBUTE_UNUSED,
9433 	 i386_operand_type *types)
9434 {
9435   static const struct
9436   {
9437     const char *str;
9438     int len;
9439     const enum bfd_reloc_code_real rel[2];
9440     const i386_operand_type types64;
9441   }
9442   gotrel[] =
9443   {
9444     { STRING_COMMA_LEN ("SECREL32"),    { BFD_RELOC_32_SECREL,
9445 					  BFD_RELOC_32_SECREL },
9446       OPERAND_TYPE_IMM32_32S_64_DISP32_64 },
9447   };
9448 
9449   char *cp;
9450   unsigned j;
9451 
9452   for (cp = input_line_pointer; *cp != '@'; cp++)
9453     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
9454       return NULL;
9455 
9456   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
9457     {
9458       int len = gotrel[j].len;
9459 
9460       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
9461 	{
9462 	  if (gotrel[j].rel[object_64bit] != 0)
9463 	    {
9464 	      int first, second;
9465 	      char *tmpbuf, *past_reloc;
9466 
9467 	      *rel = gotrel[j].rel[object_64bit];
9468 	      if (adjust)
9469 		*adjust = len;
9470 
9471 	      if (types)
9472 		{
9473 		  if (flag_code != CODE_64BIT)
9474 		    {
9475 		      types->bitfield.imm32 = 1;
9476 		      types->bitfield.disp32 = 1;
9477 		    }
9478 		  else
9479 		    *types = gotrel[j].types64;
9480 		}
9481 
9482 	      /* The length of the first part of our input line.  */
9483 	      first = cp - input_line_pointer;
9484 
9485 	      /* The second part goes from after the reloc token until
9486 		 (and including) an end_of_line char or comma.  */
9487 	      past_reloc = cp + 1 + len;
9488 	      cp = past_reloc;
9489 	      while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
9490 		++cp;
9491 	      second = cp + 1 - past_reloc;
9492 
9493 	      /* Allocate and copy string.  The trailing NUL shouldn't
9494 		 be necessary, but be safe.  */
9495 	      tmpbuf = XNEWVEC (char, first + second + 2);
9496 	      memcpy (tmpbuf, input_line_pointer, first);
9497 	      if (second != 0 && *past_reloc != ' ')
9498 		/* Replace the relocation token with ' ', so that
9499 		   errors like foo@SECLREL321 will be detected.  */
9500 		tmpbuf[first++] = ' ';
9501 	      memcpy (tmpbuf + first, past_reloc, second);
9502 	      tmpbuf[first + second] = '\0';
9503 	      return tmpbuf;
9504 	    }
9505 
9506 	  as_bad (_("@%s reloc is not supported with %d-bit output format"),
9507 		  gotrel[j].str, 1 << (5 + object_64bit));
9508 	  return NULL;
9509 	}
9510     }
9511 
9512   /* Might be a symbol version string.  Don't as_bad here.  */
9513   return NULL;
9514 }
9515 
9516 #endif /* TE_PE */
9517 
9518 bfd_reloc_code_real_type
9519 x86_cons (expressionS *exp, int size)
9520 {
9521   bfd_reloc_code_real_type got_reloc = NO_RELOC;
9522 
9523   intel_syntax = -intel_syntax;
9524 
9525   exp->X_md = 0;
9526   if (size == 4 || (object_64bit && size == 8))
9527     {
9528       /* Handle @GOTOFF and the like in an expression.  */
9529       char *save;
9530       char *gotfree_input_line;
9531       int adjust = 0;
9532 
9533       save = input_line_pointer;
9534       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
9535       if (gotfree_input_line)
9536 	input_line_pointer = gotfree_input_line;
9537 
9538       expression (exp);
9539 
9540       if (gotfree_input_line)
9541 	{
9542 	  /* expression () has merrily parsed up to the end of line,
9543 	     or a comma - in the wrong buffer.  Transfer how far
9544 	     input_line_pointer has moved to the right buffer.  */
9545 	  input_line_pointer = (save
9546 				+ (input_line_pointer - gotfree_input_line)
9547 				+ adjust);
9548 	  free (gotfree_input_line);
9549 	  if (exp->X_op == O_constant
9550 	      || exp->X_op == O_absent
9551 	      || exp->X_op == O_illegal
9552 	      || exp->X_op == O_register
9553 	      || exp->X_op == O_big)
9554 	    {
9555 	      char c = *input_line_pointer;
9556 	      *input_line_pointer = 0;
9557 	      as_bad (_("missing or invalid expression `%s'"), save);
9558 	      *input_line_pointer = c;
9559 	    }
9560 	  else if ((got_reloc == BFD_RELOC_386_PLT32
9561 		    || got_reloc == BFD_RELOC_X86_64_PLT32)
9562 		   && exp->X_op != O_symbol)
9563 	    {
9564 	      char c = *input_line_pointer;
9565 	      *input_line_pointer = 0;
9566 	      as_bad (_("invalid PLT expression `%s'"), save);
9567 	      *input_line_pointer = c;
9568 	    }
9569 	}
9570     }
9571   else
9572     expression (exp);
9573 
9574   intel_syntax = -intel_syntax;
9575 
9576   if (intel_syntax)
9577     i386_intel_simplify (exp);
9578 
9579   return got_reloc;
9580 }
9581 
9582 static void
9583 signed_cons (int size)
9584 {
9585   if (flag_code == CODE_64BIT)
9586     cons_sign = 1;
9587   cons (size);
9588   cons_sign = -1;
9589 }
9590 
9591 #ifdef TE_PE
9592 static void
9593 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
9594 {
9595   expressionS exp;
9596 
9597   do
9598     {
9599       expression (&exp);
9600       if (exp.X_op == O_symbol)
9601 	exp.X_op = O_secrel;
9602 
9603       emit_expr (&exp, 4);
9604     }
9605   while (*input_line_pointer++ == ',');
9606 
9607   input_line_pointer--;
9608   demand_empty_rest_of_line ();
9609 }
9610 #endif
9611 
9612 /* Handle Vector operations.  */
9613 
9614 static char *
9615 check_VecOperations (char *op_string, char *op_end)
9616 {
9617   const reg_entry *mask;
9618   const char *saved;
9619   char *end_op;
9620 
9621   while (*op_string
9622 	 && (op_end == NULL || op_string < op_end))
9623     {
9624       saved = op_string;
9625       if (*op_string == '{')
9626 	{
9627 	  op_string++;
9628 
9629 	  /* Check broadcasts.  */
9630 	  if (strncmp (op_string, "1to", 3) == 0)
9631 	    {
9632 	      int bcst_type;
9633 
9634 	      if (i.broadcast)
9635 		goto duplicated_vec_op;
9636 
9637 	      op_string += 3;
9638 	      if (*op_string == '8')
9639 		bcst_type = 8;
9640 	      else if (*op_string == '4')
9641 		bcst_type = 4;
9642 	      else if (*op_string == '2')
9643 		bcst_type = 2;
9644 	      else if (*op_string == '1'
9645 		       && *(op_string+1) == '6')
9646 		{
9647 		  bcst_type = 16;
9648 		  op_string++;
9649 		}
9650 	      else
9651 		{
9652 		  as_bad (_("Unsupported broadcast: `%s'"), saved);
9653 		  return NULL;
9654 		}
9655 	      op_string++;
9656 
9657 	      broadcast_op.type = bcst_type;
9658 	      broadcast_op.operand = this_operand;
9659 	      broadcast_op.bytes = 0;
9660 	      i.broadcast = &broadcast_op;
9661 	    }
9662 	  /* Check masking operation.  */
9663 	  else if ((mask = parse_register (op_string, &end_op)) != NULL)
9664 	    {
9665 	      /* k0 can't be used for write mask.  */
9666 	      if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
9667 		{
9668 		  as_bad (_("`%s%s' can't be used for write mask"),
9669 			  register_prefix, mask->reg_name);
9670 		  return NULL;
9671 		}
9672 
9673 	      if (!i.mask)
9674 		{
9675 		  mask_op.mask = mask;
9676 		  mask_op.zeroing = 0;
9677 		  mask_op.operand = this_operand;
9678 		  i.mask = &mask_op;
9679 		}
9680 	      else
9681 		{
9682 		  if (i.mask->mask)
9683 		    goto duplicated_vec_op;
9684 
9685 		  i.mask->mask = mask;
9686 
9687 		  /* Only "{z}" is allowed here.  No need to check
9688 		     zeroing mask explicitly.  */
9689 		  if (i.mask->operand != this_operand)
9690 		    {
9691 		      as_bad (_("invalid write mask `%s'"), saved);
9692 		      return NULL;
9693 		    }
9694 		}
9695 
9696 	      op_string = end_op;
9697 	    }
9698 	  /* Check zeroing-flag for masking operation.  */
9699 	  else if (*op_string == 'z')
9700 	    {
9701 	      if (!i.mask)
9702 		{
9703 		  mask_op.mask = NULL;
9704 		  mask_op.zeroing = 1;
9705 		  mask_op.operand = this_operand;
9706 		  i.mask = &mask_op;
9707 		}
9708 	      else
9709 		{
9710 		  if (i.mask->zeroing)
9711 		    {
9712 		    duplicated_vec_op:
9713 		      as_bad (_("duplicated `%s'"), saved);
9714 		      return NULL;
9715 		    }
9716 
9717 		  i.mask->zeroing = 1;
9718 
9719 		  /* Only "{%k}" is allowed here.  No need to check mask
9720 		     register explicitly.  */
9721 		  if (i.mask->operand != this_operand)
9722 		    {
9723 		      as_bad (_("invalid zeroing-masking `%s'"),
9724 			      saved);
9725 		      return NULL;
9726 		    }
9727 		}
9728 
9729 	      op_string++;
9730 	    }
9731 	  else
9732 	    goto unknown_vec_op;
9733 
9734 	  if (*op_string != '}')
9735 	    {
9736 	      as_bad (_("missing `}' in `%s'"), saved);
9737 	      return NULL;
9738 	    }
9739 	  op_string++;
9740 
9741 	  /* Strip whitespace since the addition of pseudo prefixes
9742 	     changed how the scrubber treats '{'.  */
9743 	  if (is_space_char (*op_string))
9744 	    ++op_string;
9745 
9746 	  continue;
9747 	}
9748     unknown_vec_op:
9749       /* We don't know this one.  */
9750       as_bad (_("unknown vector operation: `%s'"), saved);
9751       return NULL;
9752     }
9753 
9754   if (i.mask && i.mask->zeroing && !i.mask->mask)
9755     {
9756       as_bad (_("zeroing-masking only allowed with write mask"));
9757       return NULL;
9758     }
9759 
9760   return op_string;
9761 }
9762 
9763 static int
9764 i386_immediate (char *imm_start)
9765 {
9766   char *save_input_line_pointer;
9767   char *gotfree_input_line;
9768   segT exp_seg = 0;
9769   expressionS *exp;
9770   i386_operand_type types;
9771 
9772   operand_type_set (&types, ~0);
9773 
9774   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
9775     {
9776       as_bad (_("at most %d immediate operands are allowed"),
9777 	      MAX_IMMEDIATE_OPERANDS);
9778       return 0;
9779     }
9780 
9781   exp = &im_expressions[i.imm_operands++];
9782   i.op[this_operand].imms = exp;
9783 
9784   if (is_space_char (*imm_start))
9785     ++imm_start;
9786 
9787   save_input_line_pointer = input_line_pointer;
9788   input_line_pointer = imm_start;
9789 
9790   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
9791   if (gotfree_input_line)
9792     input_line_pointer = gotfree_input_line;
9793 
9794   exp_seg = expression (exp);
9795 
9796   SKIP_WHITESPACE ();
9797 
9798   /* Handle vector operations.  */
9799   if (*input_line_pointer == '{')
9800     {
9801       input_line_pointer = check_VecOperations (input_line_pointer,
9802 						NULL);
9803       if (input_line_pointer == NULL)
9804 	return 0;
9805     }
9806 
9807   if (*input_line_pointer)
9808     as_bad (_("junk `%s' after expression"), input_line_pointer);
9809 
9810   input_line_pointer = save_input_line_pointer;
9811   if (gotfree_input_line)
9812     {
9813       free (gotfree_input_line);
9814 
9815       if (exp->X_op == O_constant || exp->X_op == O_register)
9816 	exp->X_op = O_illegal;
9817     }
9818 
9819   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
9820 }
9821 
9822 static int
9823 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
9824 			 i386_operand_type types, const char *imm_start)
9825 {
9826   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
9827     {
9828       if (imm_start)
9829 	as_bad (_("missing or invalid immediate expression `%s'"),
9830 		imm_start);
9831       return 0;
9832     }
9833   else if (exp->X_op == O_constant)
9834     {
9835       /* Size it properly later.  */
9836       i.types[this_operand].bitfield.imm64 = 1;
9837       /* If not 64bit, sign extend val.  */
9838       if (flag_code != CODE_64BIT
9839 	  && (exp->X_add_number & ~(((addressT) 2 << 31) - 1)) == 0)
9840 	exp->X_add_number
9841 	  = (exp->X_add_number ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
9842     }
9843 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
9844   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
9845 	   && exp_seg != absolute_section
9846 	   && exp_seg != text_section
9847 	   && exp_seg != data_section
9848 	   && exp_seg != bss_section
9849 	   && exp_seg != undefined_section
9850 	   && !bfd_is_com_section (exp_seg))
9851     {
9852       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
9853       return 0;
9854     }
9855 #endif
9856   else if (!intel_syntax && exp_seg == reg_section)
9857     {
9858       if (imm_start)
9859 	as_bad (_("illegal immediate register operand %s"), imm_start);
9860       return 0;
9861     }
9862   else
9863     {
9864       /* This is an address.  The size of the address will be
9865 	 determined later, depending on destination register,
9866 	 suffix, or the default for the section.  */
9867       i.types[this_operand].bitfield.imm8 = 1;
9868       i.types[this_operand].bitfield.imm16 = 1;
9869       i.types[this_operand].bitfield.imm32 = 1;
9870       i.types[this_operand].bitfield.imm32s = 1;
9871       i.types[this_operand].bitfield.imm64 = 1;
9872       i.types[this_operand] = operand_type_and (i.types[this_operand],
9873 						types);
9874     }
9875 
9876   return 1;
9877 }
9878 
9879 static char *
9880 i386_scale (char *scale)
9881 {
9882   offsetT val;
9883   char *save = input_line_pointer;
9884 
9885   input_line_pointer = scale;
9886   val = get_absolute_expression ();
9887 
9888   switch (val)
9889     {
9890     case 1:
9891       i.log2_scale_factor = 0;
9892       break;
9893     case 2:
9894       i.log2_scale_factor = 1;
9895       break;
9896     case 4:
9897       i.log2_scale_factor = 2;
9898       break;
9899     case 8:
9900       i.log2_scale_factor = 3;
9901       break;
9902     default:
9903       {
9904 	char sep = *input_line_pointer;
9905 
9906 	*input_line_pointer = '\0';
9907 	as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
9908 		scale);
9909 	*input_line_pointer = sep;
9910 	input_line_pointer = save;
9911 	return NULL;
9912       }
9913     }
9914   if (i.log2_scale_factor != 0 && i.index_reg == 0)
9915     {
9916       as_warn (_("scale factor of %d without an index register"),
9917 	       1 << i.log2_scale_factor);
9918       i.log2_scale_factor = 0;
9919     }
9920   scale = input_line_pointer;
9921   input_line_pointer = save;
9922   return scale;
9923 }
9924 
9925 static int
9926 i386_displacement (char *disp_start, char *disp_end)
9927 {
9928   expressionS *exp;
9929   segT exp_seg = 0;
9930   char *save_input_line_pointer;
9931   char *gotfree_input_line;
9932   int override;
9933   i386_operand_type bigdisp, types = anydisp;
9934   int ret;
9935 
9936   if (i.disp_operands == MAX_MEMORY_OPERANDS)
9937     {
9938       as_bad (_("at most %d displacement operands are allowed"),
9939 	      MAX_MEMORY_OPERANDS);
9940       return 0;
9941     }
9942 
9943   operand_type_set (&bigdisp, 0);
9944   if (i.jumpabsolute
9945       || i.types[this_operand].bitfield.baseindex
9946       || (current_templates->start->opcode_modifier.jump != JUMP
9947 	  && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
9948     {
9949       i386_addressing_mode ();
9950       override = (i.prefix[ADDR_PREFIX] != 0);
9951       if (flag_code == CODE_64BIT)
9952 	{
9953 	  if (!override)
9954 	    {
9955 	      bigdisp.bitfield.disp32s = 1;
9956 	      bigdisp.bitfield.disp64 = 1;
9957 	    }
9958 	  else
9959 	    bigdisp.bitfield.disp32 = 1;
9960 	}
9961       else if ((flag_code == CODE_16BIT) ^ override)
9962 	  bigdisp.bitfield.disp16 = 1;
9963       else
9964 	  bigdisp.bitfield.disp32 = 1;
9965     }
9966   else
9967     {
9968       /* For PC-relative branches, the width of the displacement may be
9969 	 dependent upon data size, but is never dependent upon address size.
9970 	 Also make sure to not unintentionally match against a non-PC-relative
9971 	 branch template.  */
9972       static templates aux_templates;
9973       const insn_template *t = current_templates->start;
9974       bfd_boolean has_intel64 = FALSE;
9975 
9976       aux_templates.start = t;
9977       while (++t < current_templates->end)
9978 	{
9979 	  if (t->opcode_modifier.jump
9980 	      != current_templates->start->opcode_modifier.jump)
9981 	    break;
9982 	  if (t->opcode_modifier.intel64)
9983 	    has_intel64 = TRUE;
9984 	}
9985       if (t < current_templates->end)
9986 	{
9987 	  aux_templates.end = t;
9988 	  current_templates = &aux_templates;
9989 	}
9990 
9991       override = (i.prefix[DATA_PREFIX] != 0);
9992       if (flag_code == CODE_64BIT)
9993 	{
9994 	  if ((override || i.suffix == WORD_MNEM_SUFFIX)
9995 	      && (!intel64 || !has_intel64))
9996 	    bigdisp.bitfield.disp16 = 1;
9997 	  else
9998 	    bigdisp.bitfield.disp32s = 1;
9999 	}
10000       else
10001 	{
10002 	  if (!override)
10003 	    override = (i.suffix == (flag_code != CODE_16BIT
10004 				     ? WORD_MNEM_SUFFIX
10005 				     : LONG_MNEM_SUFFIX));
10006 	  bigdisp.bitfield.disp32 = 1;
10007 	  if ((flag_code == CODE_16BIT) ^ override)
10008 	    {
10009 	      bigdisp.bitfield.disp32 = 0;
10010 	      bigdisp.bitfield.disp16 = 1;
10011 	    }
10012 	}
10013     }
10014   i.types[this_operand] = operand_type_or (i.types[this_operand],
10015 					   bigdisp);
10016 
10017   exp = &disp_expressions[i.disp_operands];
10018   i.op[this_operand].disps = exp;
10019   i.disp_operands++;
10020   save_input_line_pointer = input_line_pointer;
10021   input_line_pointer = disp_start;
10022   END_STRING_AND_SAVE (disp_end);
10023 
10024 #ifndef GCC_ASM_O_HACK
10025 #define GCC_ASM_O_HACK 0
10026 #endif
10027 #if GCC_ASM_O_HACK
10028   END_STRING_AND_SAVE (disp_end + 1);
10029   if (i.types[this_operand].bitfield.baseIndex
10030       && displacement_string_end[-1] == '+')
10031     {
10032       /* This hack is to avoid a warning when using the "o"
10033 	 constraint within gcc asm statements.
10034 	 For instance:
10035 
10036 	 #define _set_tssldt_desc(n,addr,limit,type) \
10037 	 __asm__ __volatile__ ( \
10038 	 "movw %w2,%0\n\t" \
10039 	 "movw %w1,2+%0\n\t" \
10040 	 "rorl $16,%1\n\t" \
10041 	 "movb %b1,4+%0\n\t" \
10042 	 "movb %4,5+%0\n\t" \
10043 	 "movb $0,6+%0\n\t" \
10044 	 "movb %h1,7+%0\n\t" \
10045 	 "rorl $16,%1" \
10046 	 : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
10047 
10048 	 This works great except that the output assembler ends
10049 	 up looking a bit weird if it turns out that there is
10050 	 no offset.  You end up producing code that looks like:
10051 
10052 	 #APP
10053 	 movw $235,(%eax)
10054 	 movw %dx,2+(%eax)
10055 	 rorl $16,%edx
10056 	 movb %dl,4+(%eax)
10057 	 movb $137,5+(%eax)
10058 	 movb $0,6+(%eax)
10059 	 movb %dh,7+(%eax)
10060 	 rorl $16,%edx
10061 	 #NO_APP
10062 
10063 	 So here we provide the missing zero.  */
10064 
10065       *displacement_string_end = '0';
10066     }
10067 #endif
10068   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10069   if (gotfree_input_line)
10070     input_line_pointer = gotfree_input_line;
10071 
10072   exp_seg = expression (exp);
10073 
10074   SKIP_WHITESPACE ();
10075   if (*input_line_pointer)
10076     as_bad (_("junk `%s' after expression"), input_line_pointer);
10077 #if GCC_ASM_O_HACK
10078   RESTORE_END_STRING (disp_end + 1);
10079 #endif
10080   input_line_pointer = save_input_line_pointer;
10081   if (gotfree_input_line)
10082     {
10083       free (gotfree_input_line);
10084 
10085       if (exp->X_op == O_constant || exp->X_op == O_register)
10086 	exp->X_op = O_illegal;
10087     }
10088 
10089   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
10090 
10091   RESTORE_END_STRING (disp_end);
10092 
10093   return ret;
10094 }
10095 
10096 static int
10097 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10098 			    i386_operand_type types, const char *disp_start)
10099 {
10100   i386_operand_type bigdisp;
10101   int ret = 1;
10102 
10103   /* We do this to make sure that the section symbol is in
10104      the symbol table.  We will ultimately change the relocation
10105      to be relative to the beginning of the section.  */
10106   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
10107       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
10108       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10109     {
10110       if (exp->X_op != O_symbol)
10111 	goto inv_disp;
10112 
10113       if (S_IS_LOCAL (exp->X_add_symbol)
10114 	  && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
10115 	  && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
10116 	section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
10117       exp->X_op = O_subtract;
10118       exp->X_op_symbol = GOT_symbol;
10119       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
10120 	i.reloc[this_operand] = BFD_RELOC_32_PCREL;
10121       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
10122 	i.reloc[this_operand] = BFD_RELOC_64;
10123       else
10124 	i.reloc[this_operand] = BFD_RELOC_32;
10125     }
10126 
10127   else if (exp->X_op == O_absent
10128 	   || exp->X_op == O_illegal
10129 	   || exp->X_op == O_big)
10130     {
10131     inv_disp:
10132       as_bad (_("missing or invalid displacement expression `%s'"),
10133 	      disp_start);
10134       ret = 0;
10135     }
10136 
10137   else if (flag_code == CODE_64BIT
10138 	   && !i.prefix[ADDR_PREFIX]
10139 	   && exp->X_op == O_constant)
10140     {
10141       /* Since displacement is signed extended to 64bit, don't allow
10142 	 disp32 and turn off disp32s if they are out of range.  */
10143       i.types[this_operand].bitfield.disp32 = 0;
10144       if (!fits_in_signed_long (exp->X_add_number))
10145 	{
10146 	  i.types[this_operand].bitfield.disp32s = 0;
10147 	  if (i.types[this_operand].bitfield.baseindex)
10148 	    {
10149 	      as_bad (_("0x%lx out range of signed 32bit displacement"),
10150 		      (long) exp->X_add_number);
10151 	      ret = 0;
10152 	    }
10153 	}
10154     }
10155 
10156 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10157   else if (exp->X_op != O_constant
10158 	   && OUTPUT_FLAVOR == bfd_target_aout_flavour
10159 	   && exp_seg != absolute_section
10160 	   && exp_seg != text_section
10161 	   && exp_seg != data_section
10162 	   && exp_seg != bss_section
10163 	   && exp_seg != undefined_section
10164 	   && !bfd_is_com_section (exp_seg))
10165     {
10166       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10167       ret = 0;
10168     }
10169 #endif
10170 
10171   if (current_templates->start->opcode_modifier.jump == JUMP_BYTE
10172       /* Constants get taken care of by optimize_disp().  */
10173       && exp->X_op != O_constant)
10174     i.types[this_operand].bitfield.disp8 = 1;
10175 
10176   /* Check if this is a displacement only operand.  */
10177   bigdisp = i.types[this_operand];
10178   bigdisp.bitfield.disp8 = 0;
10179   bigdisp.bitfield.disp16 = 0;
10180   bigdisp.bitfield.disp32 = 0;
10181   bigdisp.bitfield.disp32s = 0;
10182   bigdisp.bitfield.disp64 = 0;
10183   if (operand_type_all_zero (&bigdisp))
10184     i.types[this_operand] = operand_type_and (i.types[this_operand],
10185 					      types);
10186 
10187   return ret;
10188 }
10189 
10190 /* Return the active addressing mode, taking address override and
10191    registers forming the address into consideration.  Update the
10192    address override prefix if necessary.  */
10193 
10194 static enum flag_code
10195 i386_addressing_mode (void)
10196 {
10197   enum flag_code addr_mode;
10198 
10199   if (i.prefix[ADDR_PREFIX])
10200     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
10201   else
10202     {
10203       addr_mode = flag_code;
10204 
10205 #if INFER_ADDR_PREFIX
10206       if (i.mem_operands == 0)
10207 	{
10208 	  /* Infer address prefix from the first memory operand.  */
10209 	  const reg_entry *addr_reg = i.base_reg;
10210 
10211 	  if (addr_reg == NULL)
10212 	    addr_reg = i.index_reg;
10213 
10214 	  if (addr_reg)
10215 	    {
10216 	      if (addr_reg->reg_type.bitfield.dword)
10217 		addr_mode = CODE_32BIT;
10218 	      else if (flag_code != CODE_64BIT
10219 		       && addr_reg->reg_type.bitfield.word)
10220 		addr_mode = CODE_16BIT;
10221 
10222 	      if (addr_mode != flag_code)
10223 		{
10224 		  i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
10225 		  i.prefixes += 1;
10226 		  /* Change the size of any displacement too.  At most one
10227 		     of Disp16 or Disp32 is set.
10228 		     FIXME.  There doesn't seem to be any real need for
10229 		     separate Disp16 and Disp32 flags.  The same goes for
10230 		     Imm16 and Imm32.  Removing them would probably clean
10231 		     up the code quite a lot.  */
10232 		  if (flag_code != CODE_64BIT
10233 		      && (i.types[this_operand].bitfield.disp16
10234 			  || i.types[this_operand].bitfield.disp32))
10235 		    i.types[this_operand]
10236 		      = operand_type_xor (i.types[this_operand], disp16_32);
10237 		}
10238 	    }
10239 	}
10240 #endif
10241     }
10242 
10243   return addr_mode;
10244 }
10245 
10246 /* Make sure the memory operand we've been dealt is valid.
10247    Return 1 on success, 0 on a failure.  */
10248 
10249 static int
10250 i386_index_check (const char *operand_string)
10251 {
10252   const char *kind = "base/index";
10253   enum flag_code addr_mode = i386_addressing_mode ();
10254 
10255   if (current_templates->start->opcode_modifier.isstring
10256       && !current_templates->start->cpu_flags.bitfield.cpupadlock
10257       && (current_templates->end[-1].opcode_modifier.isstring
10258 	  || i.mem_operands))
10259     {
10260       /* Memory operands of string insns are special in that they only allow
10261 	 a single register (rDI, rSI, or rBX) as their memory address.  */
10262       const reg_entry *expected_reg;
10263       static const char *di_si[][2] =
10264 	{
10265 	  { "esi", "edi" },
10266 	  { "si", "di" },
10267 	  { "rsi", "rdi" }
10268 	};
10269       static const char *bx[] = { "ebx", "bx", "rbx" };
10270 
10271       kind = "string address";
10272 
10273       if (current_templates->start->opcode_modifier.repprefixok)
10274 	{
10275 	  int es_op = current_templates->end[-1].opcode_modifier.isstring
10276 		      - IS_STRING_ES_OP0;
10277 	  int op = 0;
10278 
10279 	  if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
10280 	      || ((!i.mem_operands != !intel_syntax)
10281 		  && current_templates->end[-1].operand_types[1]
10282 		     .bitfield.baseindex))
10283 	    op = 1;
10284 	  expected_reg = hash_find (reg_hash, di_si[addr_mode][op == es_op]);
10285 	}
10286       else
10287 	expected_reg = hash_find (reg_hash, bx[addr_mode]);
10288 
10289       if (i.base_reg != expected_reg
10290 	  || i.index_reg
10291 	  || operand_type_check (i.types[this_operand], disp))
10292 	{
10293 	  /* The second memory operand must have the same size as
10294 	     the first one.  */
10295 	  if (i.mem_operands
10296 	      && i.base_reg
10297 	      && !((addr_mode == CODE_64BIT
10298 		    && i.base_reg->reg_type.bitfield.qword)
10299 		   || (addr_mode == CODE_32BIT
10300 		       ? i.base_reg->reg_type.bitfield.dword
10301 		       : i.base_reg->reg_type.bitfield.word)))
10302 	    goto bad_address;
10303 
10304 	  as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
10305 		   operand_string,
10306 		   intel_syntax ? '[' : '(',
10307 		   register_prefix,
10308 		   expected_reg->reg_name,
10309 		   intel_syntax ? ']' : ')');
10310 	  return 1;
10311 	}
10312       else
10313 	return 1;
10314 
10315 bad_address:
10316       as_bad (_("`%s' is not a valid %s expression"),
10317 	      operand_string, kind);
10318       return 0;
10319     }
10320   else
10321     {
10322       if (addr_mode != CODE_16BIT)
10323 	{
10324 	  /* 32-bit/64-bit checks.  */
10325 	  if ((i.base_reg
10326 	       && ((addr_mode == CODE_64BIT
10327 		    ? !i.base_reg->reg_type.bitfield.qword
10328 		    : !i.base_reg->reg_type.bitfield.dword)
10329 		   || (i.index_reg && i.base_reg->reg_num == RegIP)
10330 		   || i.base_reg->reg_num == RegIZ))
10331 	      || (i.index_reg
10332 		  && !i.index_reg->reg_type.bitfield.xmmword
10333 		  && !i.index_reg->reg_type.bitfield.ymmword
10334 		  && !i.index_reg->reg_type.bitfield.zmmword
10335 		  && ((addr_mode == CODE_64BIT
10336 		       ? !i.index_reg->reg_type.bitfield.qword
10337 		       : !i.index_reg->reg_type.bitfield.dword)
10338 		      || !i.index_reg->reg_type.bitfield.baseindex)))
10339 	    goto bad_address;
10340 
10341 	  /* bndmk, bndldx, and bndstx have special restrictions. */
10342 	  if (current_templates->start->base_opcode == 0xf30f1b
10343 	      || (current_templates->start->base_opcode & ~1) == 0x0f1a)
10344 	    {
10345 	      /* They cannot use RIP-relative addressing. */
10346 	      if (i.base_reg && i.base_reg->reg_num == RegIP)
10347 		{
10348 		  as_bad (_("`%s' cannot be used here"), operand_string);
10349 		  return 0;
10350 		}
10351 
10352 	      /* bndldx and bndstx ignore their scale factor. */
10353 	      if (current_templates->start->base_opcode != 0xf30f1b
10354 		  && i.log2_scale_factor)
10355 		as_warn (_("register scaling is being ignored here"));
10356 	    }
10357 	}
10358       else
10359 	{
10360 	  /* 16-bit checks.  */
10361 	  if ((i.base_reg
10362 	       && (!i.base_reg->reg_type.bitfield.word
10363 		   || !i.base_reg->reg_type.bitfield.baseindex))
10364 	      || (i.index_reg
10365 		  && (!i.index_reg->reg_type.bitfield.word
10366 		      || !i.index_reg->reg_type.bitfield.baseindex
10367 		      || !(i.base_reg
10368 			   && i.base_reg->reg_num < 6
10369 			   && i.index_reg->reg_num >= 6
10370 			   && i.log2_scale_factor == 0))))
10371 	    goto bad_address;
10372 	}
10373     }
10374   return 1;
10375 }
10376 
10377 /* Handle vector immediates.  */
10378 
10379 static int
10380 RC_SAE_immediate (const char *imm_start)
10381 {
10382   unsigned int match_found, j;
10383   const char *pstr = imm_start;
10384   expressionS *exp;
10385 
10386   if (*pstr != '{')
10387     return 0;
10388 
10389   pstr++;
10390   match_found = 0;
10391   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10392     {
10393       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10394 	{
10395 	  if (!i.rounding)
10396 	    {
10397 	      rc_op.type = RC_NamesTable[j].type;
10398 	      rc_op.operand = this_operand;
10399 	      i.rounding = &rc_op;
10400 	    }
10401 	  else
10402 	    {
10403 	      as_bad (_("duplicated `%s'"), imm_start);
10404 	      return 0;
10405 	    }
10406 	  pstr += RC_NamesTable[j].len;
10407 	  match_found = 1;
10408 	  break;
10409 	}
10410     }
10411   if (!match_found)
10412     return 0;
10413 
10414   if (*pstr++ != '}')
10415     {
10416       as_bad (_("Missing '}': '%s'"), imm_start);
10417       return 0;
10418     }
10419   /* RC/SAE immediate string should contain nothing more.  */;
10420   if (*pstr != 0)
10421     {
10422       as_bad (_("Junk after '}': '%s'"), imm_start);
10423       return 0;
10424     }
10425 
10426   exp = &im_expressions[i.imm_operands++];
10427   i.op[this_operand].imms = exp;
10428 
10429   exp->X_op = O_constant;
10430   exp->X_add_number = 0;
10431   exp->X_add_symbol = (symbolS *) 0;
10432   exp->X_op_symbol = (symbolS *) 0;
10433 
10434   i.types[this_operand].bitfield.imm8 = 1;
10435   return 1;
10436 }
10437 
10438 /* Only string instructions can have a second memory operand, so
10439    reduce current_templates to just those if it contains any.  */
10440 static int
10441 maybe_adjust_templates (void)
10442 {
10443   const insn_template *t;
10444 
10445   gas_assert (i.mem_operands == 1);
10446 
10447   for (t = current_templates->start; t < current_templates->end; ++t)
10448     if (t->opcode_modifier.isstring)
10449       break;
10450 
10451   if (t < current_templates->end)
10452     {
10453       static templates aux_templates;
10454       bfd_boolean recheck;
10455 
10456       aux_templates.start = t;
10457       for (; t < current_templates->end; ++t)
10458 	if (!t->opcode_modifier.isstring)
10459 	  break;
10460       aux_templates.end = t;
10461 
10462       /* Determine whether to re-check the first memory operand.  */
10463       recheck = (aux_templates.start != current_templates->start
10464 		 || t != current_templates->end);
10465 
10466       current_templates = &aux_templates;
10467 
10468       if (recheck)
10469 	{
10470 	  i.mem_operands = 0;
10471 	  if (i.memop1_string != NULL
10472 	      && i386_index_check (i.memop1_string) == 0)
10473 	    return 0;
10474 	  i.mem_operands = 1;
10475 	}
10476     }
10477 
10478   return 1;
10479 }
10480 
10481 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
10482    on error.  */
10483 
10484 static int
10485 i386_att_operand (char *operand_string)
10486 {
10487   const reg_entry *r;
10488   char *end_op;
10489   char *op_string = operand_string;
10490 
10491   if (is_space_char (*op_string))
10492     ++op_string;
10493 
10494   /* We check for an absolute prefix (differentiating,
10495      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
10496   if (*op_string == ABSOLUTE_PREFIX)
10497     {
10498       ++op_string;
10499       if (is_space_char (*op_string))
10500 	++op_string;
10501       i.jumpabsolute = TRUE;
10502     }
10503 
10504   /* Check if operand is a register.  */
10505   if ((r = parse_register (op_string, &end_op)) != NULL)
10506     {
10507       i386_operand_type temp;
10508 
10509       /* Check for a segment override by searching for ':' after a
10510 	 segment register.  */
10511       op_string = end_op;
10512       if (is_space_char (*op_string))
10513 	++op_string;
10514       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
10515 	{
10516 	  switch (r->reg_num)
10517 	    {
10518 	    case 0:
10519 	      i.seg[i.mem_operands] = &es;
10520 	      break;
10521 	    case 1:
10522 	      i.seg[i.mem_operands] = &cs;
10523 	      break;
10524 	    case 2:
10525 	      i.seg[i.mem_operands] = &ss;
10526 	      break;
10527 	    case 3:
10528 	      i.seg[i.mem_operands] = &ds;
10529 	      break;
10530 	    case 4:
10531 	      i.seg[i.mem_operands] = &fs;
10532 	      break;
10533 	    case 5:
10534 	      i.seg[i.mem_operands] = &gs;
10535 	      break;
10536 	    }
10537 
10538 	  /* Skip the ':' and whitespace.  */
10539 	  ++op_string;
10540 	  if (is_space_char (*op_string))
10541 	    ++op_string;
10542 
10543 	  if (!is_digit_char (*op_string)
10544 	      && !is_identifier_char (*op_string)
10545 	      && *op_string != '('
10546 	      && *op_string != ABSOLUTE_PREFIX)
10547 	    {
10548 	      as_bad (_("bad memory operand `%s'"), op_string);
10549 	      return 0;
10550 	    }
10551 	  /* Handle case of %es:*foo.  */
10552 	  if (*op_string == ABSOLUTE_PREFIX)
10553 	    {
10554 	      ++op_string;
10555 	      if (is_space_char (*op_string))
10556 		++op_string;
10557 	      i.jumpabsolute = TRUE;
10558 	    }
10559 	  goto do_memory_reference;
10560 	}
10561 
10562       /* Handle vector operations.  */
10563       if (*op_string == '{')
10564 	{
10565 	  op_string = check_VecOperations (op_string, NULL);
10566 	  if (op_string == NULL)
10567 	    return 0;
10568 	}
10569 
10570       if (*op_string)
10571 	{
10572 	  as_bad (_("junk `%s' after register"), op_string);
10573 	  return 0;
10574 	}
10575       temp = r->reg_type;
10576       temp.bitfield.baseindex = 0;
10577       i.types[this_operand] = operand_type_or (i.types[this_operand],
10578 					       temp);
10579       i.types[this_operand].bitfield.unspecified = 0;
10580       i.op[this_operand].regs = r;
10581       i.reg_operands++;
10582     }
10583   else if (*op_string == REGISTER_PREFIX)
10584     {
10585       as_bad (_("bad register name `%s'"), op_string);
10586       return 0;
10587     }
10588   else if (*op_string == IMMEDIATE_PREFIX)
10589     {
10590       ++op_string;
10591       if (i.jumpabsolute)
10592 	{
10593 	  as_bad (_("immediate operand illegal with absolute jump"));
10594 	  return 0;
10595 	}
10596       if (!i386_immediate (op_string))
10597 	return 0;
10598     }
10599   else if (RC_SAE_immediate (operand_string))
10600     {
10601       /* If it is a RC or SAE immediate, do nothing.  */
10602       ;
10603     }
10604   else if (is_digit_char (*op_string)
10605 	   || is_identifier_char (*op_string)
10606 	   || *op_string == '"'
10607 	   || *op_string == '(')
10608     {
10609       /* This is a memory reference of some sort.  */
10610       char *base_string;
10611 
10612       /* Start and end of displacement string expression (if found).  */
10613       char *displacement_string_start;
10614       char *displacement_string_end;
10615       char *vop_start;
10616 
10617     do_memory_reference:
10618       if (i.mem_operands == 1 && !maybe_adjust_templates ())
10619 	return 0;
10620       if ((i.mem_operands == 1
10621 	   && !current_templates->start->opcode_modifier.isstring)
10622 	  || i.mem_operands == 2)
10623 	{
10624 	  as_bad (_("too many memory references for `%s'"),
10625 		  current_templates->start->name);
10626 	  return 0;
10627 	}
10628 
10629       /* Check for base index form.  We detect the base index form by
10630 	 looking for an ')' at the end of the operand, searching
10631 	 for the '(' matching it, and finding a REGISTER_PREFIX or ','
10632 	 after the '('.  */
10633       base_string = op_string + strlen (op_string);
10634 
10635       /* Handle vector operations.  */
10636       vop_start = strchr (op_string, '{');
10637       if (vop_start && vop_start < base_string)
10638 	{
10639 	  if (check_VecOperations (vop_start, base_string) == NULL)
10640 	    return 0;
10641 	  base_string = vop_start;
10642 	}
10643 
10644       --base_string;
10645       if (is_space_char (*base_string))
10646 	--base_string;
10647 
10648       /* If we only have a displacement, set-up for it to be parsed later.  */
10649       displacement_string_start = op_string;
10650       displacement_string_end = base_string + 1;
10651 
10652       if (*base_string == ')')
10653 	{
10654 	  char *temp_string;
10655 	  unsigned int parens_balanced = 1;
10656 	  /* We've already checked that the number of left & right ()'s are
10657 	     equal, so this loop will not be infinite.  */
10658 	  do
10659 	    {
10660 	      base_string--;
10661 	      if (*base_string == ')')
10662 		parens_balanced++;
10663 	      if (*base_string == '(')
10664 		parens_balanced--;
10665 	    }
10666 	  while (parens_balanced);
10667 
10668 	  temp_string = base_string;
10669 
10670 	  /* Skip past '(' and whitespace.  */
10671 	  ++base_string;
10672 	  if (is_space_char (*base_string))
10673 	    ++base_string;
10674 
10675 	  if (*base_string == ','
10676 	      || ((i.base_reg = parse_register (base_string, &end_op))
10677 		  != NULL))
10678 	    {
10679 	      displacement_string_end = temp_string;
10680 
10681 	      i.types[this_operand].bitfield.baseindex = 1;
10682 
10683 	      if (i.base_reg)
10684 		{
10685 		  base_string = end_op;
10686 		  if (is_space_char (*base_string))
10687 		    ++base_string;
10688 		}
10689 
10690 	      /* There may be an index reg or scale factor here.  */
10691 	      if (*base_string == ',')
10692 		{
10693 		  ++base_string;
10694 		  if (is_space_char (*base_string))
10695 		    ++base_string;
10696 
10697 		  if ((i.index_reg = parse_register (base_string, &end_op))
10698 		      != NULL)
10699 		    {
10700 		      base_string = end_op;
10701 		      if (is_space_char (*base_string))
10702 			++base_string;
10703 		      if (*base_string == ',')
10704 			{
10705 			  ++base_string;
10706 			  if (is_space_char (*base_string))
10707 			    ++base_string;
10708 			}
10709 		      else if (*base_string != ')')
10710 			{
10711 			  as_bad (_("expecting `,' or `)' "
10712 				    "after index register in `%s'"),
10713 				  operand_string);
10714 			  return 0;
10715 			}
10716 		    }
10717 		  else if (*base_string == REGISTER_PREFIX)
10718 		    {
10719 		      end_op = strchr (base_string, ',');
10720 		      if (end_op)
10721 			*end_op = '\0';
10722 		      as_bad (_("bad register name `%s'"), base_string);
10723 		      return 0;
10724 		    }
10725 
10726 		  /* Check for scale factor.  */
10727 		  if (*base_string != ')')
10728 		    {
10729 		      char *end_scale = i386_scale (base_string);
10730 
10731 		      if (!end_scale)
10732 			return 0;
10733 
10734 		      base_string = end_scale;
10735 		      if (is_space_char (*base_string))
10736 			++base_string;
10737 		      if (*base_string != ')')
10738 			{
10739 			  as_bad (_("expecting `)' "
10740 				    "after scale factor in `%s'"),
10741 				  operand_string);
10742 			  return 0;
10743 			}
10744 		    }
10745 		  else if (!i.index_reg)
10746 		    {
10747 		      as_bad (_("expecting index register or scale factor "
10748 				"after `,'; got '%c'"),
10749 			      *base_string);
10750 		      return 0;
10751 		    }
10752 		}
10753 	      else if (*base_string != ')')
10754 		{
10755 		  as_bad (_("expecting `,' or `)' "
10756 			    "after base register in `%s'"),
10757 			  operand_string);
10758 		  return 0;
10759 		}
10760 	    }
10761 	  else if (*base_string == REGISTER_PREFIX)
10762 	    {
10763 	      end_op = strchr (base_string, ',');
10764 	      if (end_op)
10765 		*end_op = '\0';
10766 	      as_bad (_("bad register name `%s'"), base_string);
10767 	      return 0;
10768 	    }
10769 	}
10770 
10771       /* If there's an expression beginning the operand, parse it,
10772 	 assuming displacement_string_start and
10773 	 displacement_string_end are meaningful.  */
10774       if (displacement_string_start != displacement_string_end)
10775 	{
10776 	  if (!i386_displacement (displacement_string_start,
10777 				  displacement_string_end))
10778 	    return 0;
10779 	}
10780 
10781       /* Special case for (%dx) while doing input/output op.  */
10782       if (i.base_reg
10783 	  && i.base_reg->reg_type.bitfield.instance == RegD
10784 	  && i.base_reg->reg_type.bitfield.word
10785 	  && i.index_reg == 0
10786 	  && i.log2_scale_factor == 0
10787 	  && i.seg[i.mem_operands] == 0
10788 	  && !operand_type_check (i.types[this_operand], disp))
10789 	{
10790 	  i.types[this_operand] = i.base_reg->reg_type;
10791 	  return 1;
10792 	}
10793 
10794       if (i386_index_check (operand_string) == 0)
10795 	return 0;
10796       i.flags[this_operand] |= Operand_Mem;
10797       if (i.mem_operands == 0)
10798 	i.memop1_string = xstrdup (operand_string);
10799       i.mem_operands++;
10800     }
10801   else
10802     {
10803       /* It's not a memory operand; argh!  */
10804       as_bad (_("invalid char %s beginning operand %d `%s'"),
10805 	      output_invalid (*op_string),
10806 	      this_operand + 1,
10807 	      op_string);
10808       return 0;
10809     }
10810   return 1;			/* Normal return.  */
10811 }
10812 
10813 /* Calculate the maximum variable size (i.e., excluding fr_fix)
10814    that an rs_machine_dependent frag may reach.  */
10815 
10816 unsigned int
10817 i386_frag_max_var (fragS *frag)
10818 {
10819   /* The only relaxable frags are for jumps.
10820      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
10821   gas_assert (frag->fr_type == rs_machine_dependent);
10822   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
10823 }
10824 
10825 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10826 static int
10827 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
10828 {
10829   /* STT_GNU_IFUNC symbol must go through PLT.  */
10830   if ((symbol_get_bfdsym (fr_symbol)->flags
10831        & BSF_GNU_INDIRECT_FUNCTION) != 0)
10832     return 0;
10833 
10834   if (!S_IS_EXTERNAL (fr_symbol))
10835     /* Symbol may be weak or local.  */
10836     return !S_IS_WEAK (fr_symbol);
10837 
10838   /* Global symbols with non-default visibility can't be preempted. */
10839   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
10840     return 1;
10841 
10842   if (fr_var != NO_RELOC)
10843     switch ((enum bfd_reloc_code_real) fr_var)
10844       {
10845       case BFD_RELOC_386_PLT32:
10846       case BFD_RELOC_X86_64_PLT32:
10847 	/* Symbol with PLT relocation may be preempted. */
10848 	return 0;
10849       default:
10850 	abort ();
10851       }
10852 
10853   /* Global symbols with default visibility in a shared library may be
10854      preempted by another definition.  */
10855   return !shared;
10856 }
10857 #endif
10858 
10859 /* Return the next non-empty frag.  */
10860 
10861 static fragS *
10862 i386_next_non_empty_frag (fragS *fragP)
10863 {
10864   /* There may be a frag with a ".fill 0" when there is no room in
10865      the current frag for frag_grow in output_insn.  */
10866   for (fragP = fragP->fr_next;
10867        (fragP != NULL
10868 	&& fragP->fr_type == rs_fill
10869 	&& fragP->fr_fix == 0);
10870        fragP = fragP->fr_next)
10871     ;
10872   return fragP;
10873 }
10874 
10875 /* Return the next jcc frag after BRANCH_PADDING.  */
10876 
10877 static fragS *
10878 i386_next_jcc_frag (fragS *fragP)
10879 {
10880   if (!fragP)
10881     return NULL;
10882 
10883   if (fragP->fr_type == rs_machine_dependent
10884       && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10885 	  == BRANCH_PADDING))
10886     {
10887       fragP = i386_next_non_empty_frag (fragP);
10888       if (fragP->fr_type != rs_machine_dependent)
10889 	return NULL;
10890       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP)
10891 	return fragP;
10892     }
10893 
10894   return NULL;
10895 }
10896 
10897 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
10898 
10899 static void
10900 i386_classify_machine_dependent_frag (fragS *fragP)
10901 {
10902   fragS *cmp_fragP;
10903   fragS *pad_fragP;
10904   fragS *branch_fragP;
10905   fragS *next_fragP;
10906   unsigned int max_prefix_length;
10907 
10908   if (fragP->tc_frag_data.classified)
10909     return;
10910 
10911   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
10912      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
10913   for (next_fragP = fragP;
10914        next_fragP != NULL;
10915        next_fragP = next_fragP->fr_next)
10916     {
10917       next_fragP->tc_frag_data.classified = 1;
10918       if (next_fragP->fr_type == rs_machine_dependent)
10919 	switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
10920 	  {
10921 	  case BRANCH_PADDING:
10922 	    /* The BRANCH_PADDING frag must be followed by a branch
10923 	       frag.  */
10924 	    branch_fragP = i386_next_non_empty_frag (next_fragP);
10925 	    next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
10926 	    break;
10927 	  case FUSED_JCC_PADDING:
10928 	    /* Check if this is a fused jcc:
10929 	       FUSED_JCC_PADDING
10930 	       CMP like instruction
10931 	       BRANCH_PADDING
10932 	       COND_JUMP
10933 	       */
10934 	    cmp_fragP = i386_next_non_empty_frag (next_fragP);
10935 	    pad_fragP = i386_next_non_empty_frag (cmp_fragP);
10936 	    branch_fragP = i386_next_jcc_frag (pad_fragP);
10937 	    if (branch_fragP)
10938 	      {
10939 		/* The BRANCH_PADDING frag is merged with the
10940 		   FUSED_JCC_PADDING frag.  */
10941 		next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
10942 		/* CMP like instruction size.  */
10943 		next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
10944 		frag_wane (pad_fragP);
10945 		/* Skip to branch_fragP.  */
10946 		next_fragP = branch_fragP;
10947 	      }
10948 	    else if (next_fragP->tc_frag_data.max_prefix_length)
10949 	      {
10950 		/* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
10951 		   a fused jcc.  */
10952 		next_fragP->fr_subtype
10953 		  = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
10954 		next_fragP->tc_frag_data.max_bytes
10955 		  = next_fragP->tc_frag_data.max_prefix_length;
10956 		/* This will be updated in the BRANCH_PREFIX scan.  */
10957 		next_fragP->tc_frag_data.max_prefix_length = 0;
10958 	      }
10959 	    else
10960 	      frag_wane (next_fragP);
10961 	    break;
10962 	  }
10963     }
10964 
10965   /* Stop if there is no BRANCH_PREFIX.  */
10966   if (!align_branch_prefix_size)
10967     return;
10968 
10969   /* Scan for BRANCH_PREFIX.  */
10970   for (; fragP != NULL; fragP = fragP->fr_next)
10971     {
10972       if (fragP->fr_type != rs_machine_dependent
10973 	  || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10974 	      != BRANCH_PREFIX))
10975 	continue;
10976 
10977       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
10978 	 COND_JUMP_PREFIX.  */
10979       max_prefix_length = 0;
10980       for (next_fragP = fragP;
10981 	   next_fragP != NULL;
10982 	   next_fragP = next_fragP->fr_next)
10983 	{
10984 	  if (next_fragP->fr_type == rs_fill)
10985 	    /* Skip rs_fill frags.  */
10986 	    continue;
10987 	  else if (next_fragP->fr_type != rs_machine_dependent)
10988 	    /* Stop for all other frags.  */
10989 	    break;
10990 
10991 	  /* rs_machine_dependent frags.  */
10992 	  if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
10993 	      == BRANCH_PREFIX)
10994 	    {
10995 	      /* Count BRANCH_PREFIX frags.  */
10996 	      if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
10997 		{
10998 		  max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
10999 		  frag_wane (next_fragP);
11000 		}
11001 	      else
11002 		max_prefix_length
11003 		  += next_fragP->tc_frag_data.max_bytes;
11004 	    }
11005 	  else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11006 		    == BRANCH_PADDING)
11007 		   || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11008 		       == FUSED_JCC_PADDING))
11009 	    {
11010 	      /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
11011 	      fragP->tc_frag_data.u.padding_fragP = next_fragP;
11012 	      break;
11013 	    }
11014 	  else
11015 	    /* Stop for other rs_machine_dependent frags.  */
11016 	    break;
11017 	}
11018 
11019       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
11020 
11021       /* Skip to the next frag.  */
11022       fragP = next_fragP;
11023     }
11024 }
11025 
11026 /* Compute padding size for
11027 
11028 	FUSED_JCC_PADDING
11029 	CMP like instruction
11030 	BRANCH_PADDING
11031 	COND_JUMP/UNCOND_JUMP
11032 
11033    or
11034 
11035 	BRANCH_PADDING
11036 	COND_JUMP/UNCOND_JUMP
11037  */
11038 
11039 static int
11040 i386_branch_padding_size (fragS *fragP, offsetT address)
11041 {
11042   unsigned int offset, size, padding_size;
11043   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
11044 
11045   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
11046   if (!address)
11047     address = fragP->fr_address;
11048   address += fragP->fr_fix;
11049 
11050   /* CMP like instrunction size.  */
11051   size = fragP->tc_frag_data.cmp_size;
11052 
11053   /* The base size of the branch frag.  */
11054   size += branch_fragP->fr_fix;
11055 
11056   /* Add opcode and displacement bytes for the rs_machine_dependent
11057      branch frag.  */
11058   if (branch_fragP->fr_type == rs_machine_dependent)
11059     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
11060 
11061   /* Check if branch is within boundary and doesn't end at the last
11062      byte.  */
11063   offset = address & ((1U << align_branch_power) - 1);
11064   if ((offset + size) >= (1U << align_branch_power))
11065     /* Padding needed to avoid crossing boundary.  */
11066     padding_size = (1U << align_branch_power) - offset;
11067   else
11068     /* No padding needed.  */
11069     padding_size = 0;
11070 
11071   /* The return value may be saved in tc_frag_data.length which is
11072      unsigned byte.  */
11073   if (!fits_in_unsigned_byte (padding_size))
11074     abort ();
11075 
11076   return padding_size;
11077 }
11078 
11079 /* i386_generic_table_relax_frag()
11080 
11081    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
11082    grow/shrink padding to align branch frags.  Hand others to
11083    relax_frag().  */
11084 
11085 long
11086 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
11087 {
11088   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11089       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11090     {
11091       long padding_size = i386_branch_padding_size (fragP, 0);
11092       long grow = padding_size - fragP->tc_frag_data.length;
11093 
11094       /* When the BRANCH_PREFIX frag is used, the computed address
11095          must match the actual address and there should be no padding.  */
11096       if (fragP->tc_frag_data.padding_address
11097 	  && (fragP->tc_frag_data.padding_address != fragP->fr_address
11098 	      || padding_size))
11099 	abort ();
11100 
11101       /* Update the padding size.  */
11102       if (grow)
11103 	fragP->tc_frag_data.length = padding_size;
11104 
11105       return grow;
11106     }
11107   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11108     {
11109       fragS *padding_fragP, *next_fragP;
11110       long padding_size, left_size, last_size;
11111 
11112       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11113       if (!padding_fragP)
11114 	/* Use the padding set by the leading BRANCH_PREFIX frag.  */
11115 	return (fragP->tc_frag_data.length
11116 		- fragP->tc_frag_data.last_length);
11117 
11118       /* Compute the relative address of the padding frag in the very
11119         first time where the BRANCH_PREFIX frag sizes are zero.  */
11120       if (!fragP->tc_frag_data.padding_address)
11121 	fragP->tc_frag_data.padding_address
11122 	  = padding_fragP->fr_address - (fragP->fr_address - stretch);
11123 
11124       /* First update the last length from the previous interation.  */
11125       left_size = fragP->tc_frag_data.prefix_length;
11126       for (next_fragP = fragP;
11127 	   next_fragP != padding_fragP;
11128 	   next_fragP = next_fragP->fr_next)
11129 	if (next_fragP->fr_type == rs_machine_dependent
11130 	    && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11131 		== BRANCH_PREFIX))
11132 	  {
11133 	    if (left_size)
11134 	      {
11135 		int max = next_fragP->tc_frag_data.max_bytes;
11136 		if (max)
11137 		  {
11138 		    int size;
11139 		    if (max > left_size)
11140 		      size = left_size;
11141 		    else
11142 		      size = max;
11143 		    left_size -= size;
11144 		    next_fragP->tc_frag_data.last_length = size;
11145 		  }
11146 	      }
11147 	    else
11148 	      next_fragP->tc_frag_data.last_length = 0;
11149 	  }
11150 
11151       /* Check the padding size for the padding frag.  */
11152       padding_size = i386_branch_padding_size
11153 	(padding_fragP, (fragP->fr_address
11154 			 + fragP->tc_frag_data.padding_address));
11155 
11156       last_size = fragP->tc_frag_data.prefix_length;
11157       /* Check if there is change from the last interation.  */
11158       if (padding_size == last_size)
11159 	{
11160 	  /* Update the expected address of the padding frag.  */
11161 	  padding_fragP->tc_frag_data.padding_address
11162 	    = (fragP->fr_address + padding_size
11163 	       + fragP->tc_frag_data.padding_address);
11164 	  return 0;
11165 	}
11166 
11167       if (padding_size > fragP->tc_frag_data.max_prefix_length)
11168 	{
11169 	  /* No padding if there is no sufficient room.  Clear the
11170 	     expected address of the padding frag.  */
11171 	  padding_fragP->tc_frag_data.padding_address = 0;
11172 	  padding_size = 0;
11173 	}
11174       else
11175 	/* Store the expected address of the padding frag.  */
11176 	padding_fragP->tc_frag_data.padding_address
11177 	  = (fragP->fr_address + padding_size
11178 	     + fragP->tc_frag_data.padding_address);
11179 
11180       fragP->tc_frag_data.prefix_length = padding_size;
11181 
11182       /* Update the length for the current interation.  */
11183       left_size = padding_size;
11184       for (next_fragP = fragP;
11185 	   next_fragP != padding_fragP;
11186 	   next_fragP = next_fragP->fr_next)
11187 	if (next_fragP->fr_type == rs_machine_dependent
11188 	    && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
11189 		== BRANCH_PREFIX))
11190 	  {
11191 	    if (left_size)
11192 	      {
11193 		int max = next_fragP->tc_frag_data.max_bytes;
11194 		if (max)
11195 		  {
11196 		    int size;
11197 		    if (max > left_size)
11198 		      size = left_size;
11199 		    else
11200 		      size = max;
11201 		    left_size -= size;
11202 		    next_fragP->tc_frag_data.length = size;
11203 		  }
11204 	      }
11205 	    else
11206 	      next_fragP->tc_frag_data.length = 0;
11207 	  }
11208 
11209       return (fragP->tc_frag_data.length
11210 	      - fragP->tc_frag_data.last_length);
11211     }
11212   return relax_frag (segment, fragP, stretch);
11213 }
11214 
11215 /* md_estimate_size_before_relax()
11216 
11217    Called just before relax() for rs_machine_dependent frags.  The x86
11218    assembler uses these frags to handle variable size jump
11219    instructions.
11220 
11221    Any symbol that is now undefined will not become defined.
11222    Return the correct fr_subtype in the frag.
11223    Return the initial "guess for variable size of frag" to caller.
11224    The guess is actually the growth beyond the fixed part.  Whatever
11225    we do to grow the fixed or variable part contributes to our
11226    returned value.  */
11227 
11228 int
11229 md_estimate_size_before_relax (fragS *fragP, segT segment)
11230 {
11231   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11232       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
11233       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
11234     {
11235       i386_classify_machine_dependent_frag (fragP);
11236       return fragP->tc_frag_data.length;
11237     }
11238 
11239   /* We've already got fragP->fr_subtype right;  all we have to do is
11240      check for un-relaxable symbols.  On an ELF system, we can't relax
11241      an externally visible symbol, because it may be overridden by a
11242      shared library.  */
11243   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
11244 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11245       || (IS_ELF
11246 	  && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
11247 						fragP->fr_var))
11248 #endif
11249 #if defined (OBJ_COFF) && defined (TE_PE)
11250       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
11251 	  && S_IS_WEAK (fragP->fr_symbol))
11252 #endif
11253       )
11254     {
11255       /* Symbol is undefined in this segment, or we need to keep a
11256 	 reloc so that weak symbols can be overridden.  */
11257       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
11258       enum bfd_reloc_code_real reloc_type;
11259       unsigned char *opcode;
11260       int old_fr_fix;
11261 
11262       if (fragP->fr_var != NO_RELOC)
11263 	reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
11264       else if (size == 2)
11265 	reloc_type = BFD_RELOC_16_PCREL;
11266 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11267       else if (need_plt32_p (fragP->fr_symbol))
11268 	reloc_type = BFD_RELOC_X86_64_PLT32;
11269 #endif
11270       else
11271 	reloc_type = BFD_RELOC_32_PCREL;
11272 
11273       old_fr_fix = fragP->fr_fix;
11274       opcode = (unsigned char *) fragP->fr_opcode;
11275 
11276       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
11277 	{
11278 	case UNCOND_JUMP:
11279 	  /* Make jmp (0xeb) a (d)word displacement jump.  */
11280 	  opcode[0] = 0xe9;
11281 	  fragP->fr_fix += size;
11282 	  fix_new (fragP, old_fr_fix, size,
11283 		   fragP->fr_symbol,
11284 		   fragP->fr_offset, 1,
11285 		   reloc_type);
11286 	  break;
11287 
11288 	case COND_JUMP86:
11289 	  if (size == 2
11290 	      && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
11291 	    {
11292 	      /* Negate the condition, and branch past an
11293 		 unconditional jump.  */
11294 	      opcode[0] ^= 1;
11295 	      opcode[1] = 3;
11296 	      /* Insert an unconditional jump.  */
11297 	      opcode[2] = 0xe9;
11298 	      /* We added two extra opcode bytes, and have a two byte
11299 		 offset.  */
11300 	      fragP->fr_fix += 2 + 2;
11301 	      fix_new (fragP, old_fr_fix + 2, 2,
11302 		       fragP->fr_symbol,
11303 		       fragP->fr_offset, 1,
11304 		       reloc_type);
11305 	      break;
11306 	    }
11307 	  /* Fall through.  */
11308 
11309 	case COND_JUMP:
11310 	  if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
11311 	    {
11312 	      fixS *fixP;
11313 
11314 	      fragP->fr_fix += 1;
11315 	      fixP = fix_new (fragP, old_fr_fix, 1,
11316 			      fragP->fr_symbol,
11317 			      fragP->fr_offset, 1,
11318 			      BFD_RELOC_8_PCREL);
11319 	      fixP->fx_signed = 1;
11320 	      break;
11321 	    }
11322 
11323 	  /* This changes the byte-displacement jump 0x7N
11324 	     to the (d)word-displacement jump 0x0f,0x8N.  */
11325 	  opcode[1] = opcode[0] + 0x10;
11326 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11327 	  /* We've added an opcode byte.  */
11328 	  fragP->fr_fix += 1 + size;
11329 	  fix_new (fragP, old_fr_fix + 1, size,
11330 		   fragP->fr_symbol,
11331 		   fragP->fr_offset, 1,
11332 		   reloc_type);
11333 	  break;
11334 
11335 	default:
11336 	  BAD_CASE (fragP->fr_subtype);
11337 	  break;
11338 	}
11339       frag_wane (fragP);
11340       return fragP->fr_fix - old_fr_fix;
11341     }
11342 
11343   /* Guess size depending on current relax state.  Initially the relax
11344      state will correspond to a short jump and we return 1, because
11345      the variable part of the frag (the branch offset) is one byte
11346      long.  However, we can relax a section more than once and in that
11347      case we must either set fr_subtype back to the unrelaxed state,
11348      or return the value for the appropriate branch.  */
11349   return md_relax_table[fragP->fr_subtype].rlx_length;
11350 }
11351 
11352 /* Called after relax() is finished.
11353 
11354    In:	Address of frag.
11355 	fr_type == rs_machine_dependent.
11356 	fr_subtype is what the address relaxed to.
11357 
11358    Out:	Any fixSs and constants are set up.
11359 	Caller will turn frag into a ".space 0".  */
11360 
11361 void
11362 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
11363                  fragS *fragP)
11364 {
11365   unsigned char *opcode;
11366   unsigned char *where_to_put_displacement = NULL;
11367   offsetT target_address;
11368   offsetT opcode_address;
11369   unsigned int extension = 0;
11370   offsetT displacement_from_opcode_start;
11371 
11372   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
11373       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
11374       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11375     {
11376       /* Generate nop padding.  */
11377       unsigned int size = fragP->tc_frag_data.length;
11378       if (size)
11379 	{
11380 	  if (size > fragP->tc_frag_data.max_bytes)
11381 	    abort ();
11382 
11383 	  if (flag_debug)
11384 	    {
11385 	      const char *msg;
11386 	      const char *branch = "branch";
11387 	      const char *prefix = "";
11388 	      fragS *padding_fragP;
11389 	      if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
11390 		  == BRANCH_PREFIX)
11391 		{
11392 		  padding_fragP = fragP->tc_frag_data.u.padding_fragP;
11393 		  switch (fragP->tc_frag_data.default_prefix)
11394 		    {
11395 		    default:
11396 		      abort ();
11397 		      break;
11398 		    case CS_PREFIX_OPCODE:
11399 		      prefix = " cs";
11400 		      break;
11401 		    case DS_PREFIX_OPCODE:
11402 		      prefix = " ds";
11403 		      break;
11404 		    case ES_PREFIX_OPCODE:
11405 		      prefix = " es";
11406 		      break;
11407 		    case FS_PREFIX_OPCODE:
11408 		      prefix = " fs";
11409 		      break;
11410 		    case GS_PREFIX_OPCODE:
11411 		      prefix = " gs";
11412 		      break;
11413 		    case SS_PREFIX_OPCODE:
11414 		      prefix = " ss";
11415 		      break;
11416 		    }
11417 		  if (padding_fragP)
11418 		    msg = _("%s:%u: add %d%s at 0x%llx to align "
11419 			    "%s within %d-byte boundary\n");
11420 		  else
11421 		    msg = _("%s:%u: add additional %d%s at 0x%llx to "
11422 			    "align %s within %d-byte boundary\n");
11423 		}
11424 	      else
11425 		{
11426 		  padding_fragP = fragP;
11427 		  msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
11428 			  "%s within %d-byte boundary\n");
11429 		}
11430 
11431 	      if (padding_fragP)
11432 		switch (padding_fragP->tc_frag_data.branch_type)
11433 		  {
11434 		  case align_branch_jcc:
11435 		    branch = "jcc";
11436 		    break;
11437 		  case align_branch_fused:
11438 		    branch = "fused jcc";
11439 		    break;
11440 		  case align_branch_jmp:
11441 		    branch = "jmp";
11442 		    break;
11443 		  case align_branch_call:
11444 		    branch = "call";
11445 		    break;
11446 		  case align_branch_indirect:
11447 		    branch = "indiret branch";
11448 		    break;
11449 		  case align_branch_ret:
11450 		    branch = "ret";
11451 		    break;
11452 		  default:
11453 		    break;
11454 		  }
11455 
11456 	      fprintf (stdout, msg,
11457 		       fragP->fr_file, fragP->fr_line, size, prefix,
11458 		       (long long) fragP->fr_address, branch,
11459 		       1 << align_branch_power);
11460 	    }
11461 	  if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
11462 	    memset (fragP->fr_opcode,
11463 		    fragP->tc_frag_data.default_prefix, size);
11464 	  else
11465 	    i386_generate_nops (fragP, (char *) fragP->fr_opcode,
11466 				size, 0);
11467 	  fragP->fr_fix += size;
11468 	}
11469       return;
11470     }
11471 
11472   opcode = (unsigned char *) fragP->fr_opcode;
11473 
11474   /* Address we want to reach in file space.  */
11475   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
11476 
11477   /* Address opcode resides at in file space.  */
11478   opcode_address = fragP->fr_address + fragP->fr_fix;
11479 
11480   /* Displacement from opcode start to fill into instruction.  */
11481   displacement_from_opcode_start = target_address - opcode_address;
11482 
11483   if ((fragP->fr_subtype & BIG) == 0)
11484     {
11485       /* Don't have to change opcode.  */
11486       extension = 1;		/* 1 opcode + 1 displacement  */
11487       where_to_put_displacement = &opcode[1];
11488     }
11489   else
11490     {
11491       if (no_cond_jump_promotion
11492 	  && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
11493 	as_warn_where (fragP->fr_file, fragP->fr_line,
11494 		       _("long jump required"));
11495 
11496       switch (fragP->fr_subtype)
11497 	{
11498 	case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
11499 	  extension = 4;		/* 1 opcode + 4 displacement  */
11500 	  opcode[0] = 0xe9;
11501 	  where_to_put_displacement = &opcode[1];
11502 	  break;
11503 
11504 	case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
11505 	  extension = 2;		/* 1 opcode + 2 displacement  */
11506 	  opcode[0] = 0xe9;
11507 	  where_to_put_displacement = &opcode[1];
11508 	  break;
11509 
11510 	case ENCODE_RELAX_STATE (COND_JUMP, BIG):
11511 	case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
11512 	  extension = 5;		/* 2 opcode + 4 displacement  */
11513 	  opcode[1] = opcode[0] + 0x10;
11514 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11515 	  where_to_put_displacement = &opcode[2];
11516 	  break;
11517 
11518 	case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
11519 	  extension = 3;		/* 2 opcode + 2 displacement  */
11520 	  opcode[1] = opcode[0] + 0x10;
11521 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
11522 	  where_to_put_displacement = &opcode[2];
11523 	  break;
11524 
11525 	case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
11526 	  extension = 4;
11527 	  opcode[0] ^= 1;
11528 	  opcode[1] = 3;
11529 	  opcode[2] = 0xe9;
11530 	  where_to_put_displacement = &opcode[3];
11531 	  break;
11532 
11533 	default:
11534 	  BAD_CASE (fragP->fr_subtype);
11535 	  break;
11536 	}
11537     }
11538 
11539   /* If size if less then four we are sure that the operand fits,
11540      but if it's 4, then it could be that the displacement is larger
11541      then -/+ 2GB.  */
11542   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
11543       && object_64bit
11544       && ((addressT) (displacement_from_opcode_start - extension
11545 		      + ((addressT) 1 << 31))
11546 	  > (((addressT) 2 << 31) - 1)))
11547     {
11548       as_bad_where (fragP->fr_file, fragP->fr_line,
11549 		    _("jump target out of range"));
11550       /* Make us emit 0.  */
11551       displacement_from_opcode_start = extension;
11552     }
11553   /* Now put displacement after opcode.  */
11554   md_number_to_chars ((char *) where_to_put_displacement,
11555 		      (valueT) (displacement_from_opcode_start - extension),
11556 		      DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
11557   fragP->fr_fix += extension;
11558 }
11559 
11560 /* Apply a fixup (fixP) to segment data, once it has been determined
11561    by our caller that we have all the info we need to fix it up.
11562 
11563    Parameter valP is the pointer to the value of the bits.
11564 
11565    On the 386, immediates, displacements, and data pointers are all in
11566    the same (little-endian) format, so we don't need to care about which
11567    we are handling.  */
11568 
11569 void
11570 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
11571 {
11572   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
11573   valueT value = *valP;
11574 
11575 #if !defined (TE_Mach)
11576   if (fixP->fx_pcrel)
11577     {
11578       switch (fixP->fx_r_type)
11579 	{
11580 	default:
11581 	  break;
11582 
11583 	case BFD_RELOC_64:
11584 	  fixP->fx_r_type = BFD_RELOC_64_PCREL;
11585 	  break;
11586 	case BFD_RELOC_32:
11587 	case BFD_RELOC_X86_64_32S:
11588 	  fixP->fx_r_type = BFD_RELOC_32_PCREL;
11589 	  break;
11590 	case BFD_RELOC_16:
11591 	  fixP->fx_r_type = BFD_RELOC_16_PCREL;
11592 	  break;
11593 	case BFD_RELOC_8:
11594 	  fixP->fx_r_type = BFD_RELOC_8_PCREL;
11595 	  break;
11596 	}
11597     }
11598 
11599   if (fixP->fx_addsy != NULL
11600       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
11601 	  || fixP->fx_r_type == BFD_RELOC_64_PCREL
11602 	  || fixP->fx_r_type == BFD_RELOC_16_PCREL
11603 	  || fixP->fx_r_type == BFD_RELOC_8_PCREL)
11604       && !use_rela_relocations)
11605     {
11606       /* This is a hack.  There should be a better way to handle this.
11607 	 This covers for the fact that bfd_install_relocation will
11608 	 subtract the current location (for partial_inplace, PC relative
11609 	 relocations); see more below.  */
11610 #ifndef OBJ_AOUT
11611       if (IS_ELF
11612 #ifdef TE_PE
11613 	  || OUTPUT_FLAVOR == bfd_target_coff_flavour
11614 #endif
11615 	  )
11616 	value += fixP->fx_where + fixP->fx_frag->fr_address;
11617 #endif
11618 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11619       if (IS_ELF)
11620 	{
11621 	  segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
11622 
11623 	  if ((sym_seg == seg
11624 	       || (symbol_section_p (fixP->fx_addsy)
11625 		   && sym_seg != absolute_section))
11626 	      && !generic_force_reloc (fixP))
11627 	    {
11628 	      /* Yes, we add the values in twice.  This is because
11629 		 bfd_install_relocation subtracts them out again.  I think
11630 		 bfd_install_relocation is broken, but I don't dare change
11631 		 it.  FIXME.  */
11632 	      value += fixP->fx_where + fixP->fx_frag->fr_address;
11633 	    }
11634 	}
11635 #endif
11636 #if defined (OBJ_COFF) && defined (TE_PE)
11637       /* For some reason, the PE format does not store a
11638 	 section address offset for a PC relative symbol.  */
11639       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
11640 	  || S_IS_WEAK (fixP->fx_addsy))
11641 	value += md_pcrel_from (fixP);
11642 #endif
11643     }
11644 #if defined (OBJ_COFF) && defined (TE_PE)
11645   if (fixP->fx_addsy != NULL
11646       && S_IS_WEAK (fixP->fx_addsy)
11647       /* PR 16858: Do not modify weak function references.  */
11648       && ! fixP->fx_pcrel)
11649     {
11650 #if !defined (TE_PEP)
11651       /* For x86 PE weak function symbols are neither PC-relative
11652 	 nor do they set S_IS_FUNCTION.  So the only reliable way
11653 	 to detect them is to check the flags of their containing
11654 	 section.  */
11655       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
11656 	  && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
11657 	;
11658       else
11659 #endif
11660       value -= S_GET_VALUE (fixP->fx_addsy);
11661     }
11662 #endif
11663 
11664   /* Fix a few things - the dynamic linker expects certain values here,
11665      and we must not disappoint it.  */
11666 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11667   if (IS_ELF && fixP->fx_addsy)
11668     switch (fixP->fx_r_type)
11669       {
11670       case BFD_RELOC_386_PLT32:
11671       case BFD_RELOC_X86_64_PLT32:
11672 	/* Make the jump instruction point to the address of the operand.
11673 	   At runtime we merely add the offset to the actual PLT entry.
11674 	   NB: Subtract the offset size only for jump instructions.  */
11675 	if (fixP->fx_pcrel)
11676 	  value = -4;
11677 	break;
11678 
11679       case BFD_RELOC_386_TLS_GD:
11680       case BFD_RELOC_386_TLS_LDM:
11681       case BFD_RELOC_386_TLS_IE_32:
11682       case BFD_RELOC_386_TLS_IE:
11683       case BFD_RELOC_386_TLS_GOTIE:
11684       case BFD_RELOC_386_TLS_GOTDESC:
11685       case BFD_RELOC_X86_64_TLSGD:
11686       case BFD_RELOC_X86_64_TLSLD:
11687       case BFD_RELOC_X86_64_GOTTPOFF:
11688       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
11689 	value = 0; /* Fully resolved at runtime.  No addend.  */
11690 	/* Fallthrough */
11691       case BFD_RELOC_386_TLS_LE:
11692       case BFD_RELOC_386_TLS_LDO_32:
11693       case BFD_RELOC_386_TLS_LE_32:
11694       case BFD_RELOC_X86_64_DTPOFF32:
11695       case BFD_RELOC_X86_64_DTPOFF64:
11696       case BFD_RELOC_X86_64_TPOFF32:
11697       case BFD_RELOC_X86_64_TPOFF64:
11698 	S_SET_THREAD_LOCAL (fixP->fx_addsy);
11699 	break;
11700 
11701       case BFD_RELOC_386_TLS_DESC_CALL:
11702       case BFD_RELOC_X86_64_TLSDESC_CALL:
11703 	value = 0; /* Fully resolved at runtime.  No addend.  */
11704 	S_SET_THREAD_LOCAL (fixP->fx_addsy);
11705 	fixP->fx_done = 0;
11706 	return;
11707 
11708       case BFD_RELOC_VTABLE_INHERIT:
11709       case BFD_RELOC_VTABLE_ENTRY:
11710 	fixP->fx_done = 0;
11711 	return;
11712 
11713       default:
11714 	break;
11715       }
11716 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
11717   *valP = value;
11718 #endif /* !defined (TE_Mach)  */
11719 
11720   /* Are we finished with this relocation now?  */
11721   if (fixP->fx_addsy == NULL)
11722     fixP->fx_done = 1;
11723 #if defined (OBJ_COFF) && defined (TE_PE)
11724   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
11725     {
11726       fixP->fx_done = 0;
11727       /* Remember value for tc_gen_reloc.  */
11728       fixP->fx_addnumber = value;
11729       /* Clear out the frag for now.  */
11730       value = 0;
11731     }
11732 #endif
11733   else if (use_rela_relocations)
11734     {
11735       fixP->fx_no_overflow = 1;
11736       /* Remember value for tc_gen_reloc.  */
11737       fixP->fx_addnumber = value;
11738       value = 0;
11739     }
11740 
11741   md_number_to_chars (p, value, fixP->fx_size);
11742 }
11743 
11744 const char *
11745 md_atof (int type, char *litP, int *sizeP)
11746 {
11747   /* This outputs the LITTLENUMs in REVERSE order;
11748      in accord with the bigendian 386.  */
11749   return ieee_md_atof (type, litP, sizeP, FALSE);
11750 }
11751 
11752 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
11753 
11754 static char *
11755 output_invalid (int c)
11756 {
11757   if (ISPRINT (c))
11758     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11759 	      "'%c'", c);
11760   else
11761     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
11762 	      "(0x%x)", (unsigned char) c);
11763   return output_invalid_buf;
11764 }
11765 
11766 /* REG_STRING starts *before* REGISTER_PREFIX.  */
11767 
11768 static const reg_entry *
11769 parse_real_register (char *reg_string, char **end_op)
11770 {
11771   char *s = reg_string;
11772   char *p;
11773   char reg_name_given[MAX_REG_NAME_SIZE + 1];
11774   const reg_entry *r;
11775 
11776   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
11777   if (*s == REGISTER_PREFIX)
11778     ++s;
11779 
11780   if (is_space_char (*s))
11781     ++s;
11782 
11783   p = reg_name_given;
11784   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
11785     {
11786       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
11787 	return (const reg_entry *) NULL;
11788       s++;
11789     }
11790 
11791   /* For naked regs, make sure that we are not dealing with an identifier.
11792      This prevents confusing an identifier like `eax_var' with register
11793      `eax'.  */
11794   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
11795     return (const reg_entry *) NULL;
11796 
11797   *end_op = s;
11798 
11799   r = (const reg_entry *) hash_find (reg_hash, reg_name_given);
11800 
11801   /* Handle floating point regs, allowing spaces in the (i) part.  */
11802   if (r == i386_regtab /* %st is first entry of table  */)
11803     {
11804       if (!cpu_arch_flags.bitfield.cpu8087
11805 	  && !cpu_arch_flags.bitfield.cpu287
11806 	  && !cpu_arch_flags.bitfield.cpu387)
11807 	return (const reg_entry *) NULL;
11808 
11809       if (is_space_char (*s))
11810 	++s;
11811       if (*s == '(')
11812 	{
11813 	  ++s;
11814 	  if (is_space_char (*s))
11815 	    ++s;
11816 	  if (*s >= '0' && *s <= '7')
11817 	    {
11818 	      int fpr = *s - '0';
11819 	      ++s;
11820 	      if (is_space_char (*s))
11821 		++s;
11822 	      if (*s == ')')
11823 		{
11824 		  *end_op = s + 1;
11825 		  r = (const reg_entry *) hash_find (reg_hash, "st(0)");
11826 		  know (r);
11827 		  return r + fpr;
11828 		}
11829 	    }
11830 	  /* We have "%st(" then garbage.  */
11831 	  return (const reg_entry *) NULL;
11832 	}
11833     }
11834 
11835   if (r == NULL || allow_pseudo_reg)
11836     return r;
11837 
11838   if (operand_type_all_zero (&r->reg_type))
11839     return (const reg_entry *) NULL;
11840 
11841   if ((r->reg_type.bitfield.dword
11842        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
11843        || r->reg_type.bitfield.class == RegCR
11844        || r->reg_type.bitfield.class == RegDR
11845        || r->reg_type.bitfield.class == RegTR)
11846       && !cpu_arch_flags.bitfield.cpui386)
11847     return (const reg_entry *) NULL;
11848 
11849   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
11850     return (const reg_entry *) NULL;
11851 
11852   if (!cpu_arch_flags.bitfield.cpuavx512f)
11853     {
11854       if (r->reg_type.bitfield.zmmword
11855 	  || r->reg_type.bitfield.class == RegMask)
11856 	return (const reg_entry *) NULL;
11857 
11858       if (!cpu_arch_flags.bitfield.cpuavx)
11859 	{
11860 	  if (r->reg_type.bitfield.ymmword)
11861 	    return (const reg_entry *) NULL;
11862 
11863 	  if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
11864 	    return (const reg_entry *) NULL;
11865 	}
11866     }
11867 
11868   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
11869     return (const reg_entry *) NULL;
11870 
11871   /* Don't allow fake index register unless allow_index_reg isn't 0. */
11872   if (!allow_index_reg && r->reg_num == RegIZ)
11873     return (const reg_entry *) NULL;
11874 
11875   /* Upper 16 vector registers are only available with VREX in 64bit
11876      mode, and require EVEX encoding.  */
11877   if (r->reg_flags & RegVRex)
11878     {
11879       if (!cpu_arch_flags.bitfield.cpuavx512f
11880 	  || flag_code != CODE_64BIT)
11881 	return (const reg_entry *) NULL;
11882 
11883       i.vec_encoding = vex_encoding_evex;
11884     }
11885 
11886   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
11887       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
11888       && flag_code != CODE_64BIT)
11889     return (const reg_entry *) NULL;
11890 
11891   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
11892       && !intel_syntax)
11893     return (const reg_entry *) NULL;
11894 
11895   return r;
11896 }
11897 
11898 /* REG_STRING starts *before* REGISTER_PREFIX.  */
11899 
11900 static const reg_entry *
11901 parse_register (char *reg_string, char **end_op)
11902 {
11903   const reg_entry *r;
11904 
11905   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
11906     r = parse_real_register (reg_string, end_op);
11907   else
11908     r = NULL;
11909   if (!r)
11910     {
11911       char *save = input_line_pointer;
11912       char c;
11913       symbolS *symbolP;
11914 
11915       input_line_pointer = reg_string;
11916       c = get_symbol_name (&reg_string);
11917       symbolP = symbol_find (reg_string);
11918       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
11919 	{
11920 	  const expressionS *e = symbol_get_value_expression (symbolP);
11921 
11922 	  know (e->X_op == O_register);
11923 	  know (e->X_add_number >= 0
11924 		&& (valueT) e->X_add_number < i386_regtab_size);
11925 	  r = i386_regtab + e->X_add_number;
11926 	  if ((r->reg_flags & RegVRex))
11927 	    i.vec_encoding = vex_encoding_evex;
11928 	  *end_op = input_line_pointer;
11929 	}
11930       *input_line_pointer = c;
11931       input_line_pointer = save;
11932     }
11933   return r;
11934 }
11935 
11936 int
11937 i386_parse_name (char *name, expressionS *e, char *nextcharP)
11938 {
11939   const reg_entry *r;
11940   char *end = input_line_pointer;
11941 
11942   *end = *nextcharP;
11943   r = parse_register (name, &input_line_pointer);
11944   if (r && end <= input_line_pointer)
11945     {
11946       *nextcharP = *input_line_pointer;
11947       *input_line_pointer = 0;
11948       e->X_op = O_register;
11949       e->X_add_number = r - i386_regtab;
11950       return 1;
11951     }
11952   input_line_pointer = end;
11953   *end = 0;
11954   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
11955 }
11956 
11957 void
11958 md_operand (expressionS *e)
11959 {
11960   char *end;
11961   const reg_entry *r;
11962 
11963   switch (*input_line_pointer)
11964     {
11965     case REGISTER_PREFIX:
11966       r = parse_real_register (input_line_pointer, &end);
11967       if (r)
11968 	{
11969 	  e->X_op = O_register;
11970 	  e->X_add_number = r - i386_regtab;
11971 	  input_line_pointer = end;
11972 	}
11973       break;
11974 
11975     case '[':
11976       gas_assert (intel_syntax);
11977       end = input_line_pointer++;
11978       expression (e);
11979       if (*input_line_pointer == ']')
11980 	{
11981 	  ++input_line_pointer;
11982 	  e->X_op_symbol = make_expr_symbol (e);
11983 	  e->X_add_symbol = NULL;
11984 	  e->X_add_number = 0;
11985 	  e->X_op = O_index;
11986 	}
11987       else
11988 	{
11989 	  e->X_op = O_absent;
11990 	  input_line_pointer = end;
11991 	}
11992       break;
11993     }
11994 }
11995 
11996 
11997 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11998 const char *md_shortopts = "kVQ:sqnO::";
11999 #else
12000 const char *md_shortopts = "qnO::";
12001 #endif
12002 
12003 #define OPTION_32 (OPTION_MD_BASE + 0)
12004 #define OPTION_64 (OPTION_MD_BASE + 1)
12005 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
12006 #define OPTION_MARCH (OPTION_MD_BASE + 3)
12007 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
12008 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
12009 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
12010 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
12011 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
12012 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
12013 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
12014 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
12015 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
12016 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
12017 #define OPTION_X32 (OPTION_MD_BASE + 14)
12018 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
12019 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
12020 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
12021 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
12022 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
12023 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
12024 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
12025 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
12026 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
12027 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
12028 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
12029 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
12030 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
12031 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
12032 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
12033 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
12034 
12035 struct option md_longopts[] =
12036 {
12037   {"32", no_argument, NULL, OPTION_32},
12038 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12039      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12040   {"64", no_argument, NULL, OPTION_64},
12041 #endif
12042 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12043   {"x32", no_argument, NULL, OPTION_X32},
12044   {"mshared", no_argument, NULL, OPTION_MSHARED},
12045   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
12046 #endif
12047   {"divide", no_argument, NULL, OPTION_DIVIDE},
12048   {"march", required_argument, NULL, OPTION_MARCH},
12049   {"mtune", required_argument, NULL, OPTION_MTUNE},
12050   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
12051   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
12052   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
12053   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
12054   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
12055   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
12056   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
12057   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
12058   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
12059   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
12060   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
12061   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
12062 # if defined (TE_PE) || defined (TE_PEP)
12063   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
12064 #endif
12065   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
12066   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
12067   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
12068   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
12069   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
12070   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
12071   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
12072   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
12073   {"mamd64", no_argument, NULL, OPTION_MAMD64},
12074   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
12075   {NULL, no_argument, NULL, 0}
12076 };
12077 size_t md_longopts_size = sizeof (md_longopts);
12078 
12079 int
12080 md_parse_option (int c, const char *arg)
12081 {
12082   unsigned int j;
12083   char *arch, *next, *saved, *type;
12084 
12085   switch (c)
12086     {
12087     case 'n':
12088       optimize_align_code = 0;
12089       break;
12090 
12091     case 'q':
12092       quiet_warnings = 1;
12093       break;
12094 
12095 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12096       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
12097 	 should be emitted or not.  FIXME: Not implemented.  */
12098     case 'Q':
12099       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
12100 	return 0;
12101       break;
12102 
12103       /* -V: SVR4 argument to print version ID.  */
12104     case 'V':
12105       print_version_id ();
12106       break;
12107 
12108       /* -k: Ignore for FreeBSD compatibility.  */
12109     case 'k':
12110       break;
12111 
12112     case 's':
12113       /* -s: On i386 Solaris, this tells the native assembler to use
12114 	 .stab instead of .stab.excl.  We always use .stab anyhow.  */
12115       break;
12116 
12117     case OPTION_MSHARED:
12118       shared = 1;
12119       break;
12120 
12121     case OPTION_X86_USED_NOTE:
12122       if (strcasecmp (arg, "yes") == 0)
12123         x86_used_note = 1;
12124       else if (strcasecmp (arg, "no") == 0)
12125         x86_used_note = 0;
12126       else
12127         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
12128       break;
12129 
12130 
12131 #endif
12132 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12133      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12134     case OPTION_64:
12135       {
12136 	const char **list, **l;
12137 
12138 	list = bfd_target_list ();
12139 	for (l = list; *l != NULL; l++)
12140 	  if (CONST_STRNEQ (*l, "elf64-x86-64")
12141 	      || strcmp (*l, "coff-x86-64") == 0
12142 	      || strcmp (*l, "pe-x86-64") == 0
12143 	      || strcmp (*l, "pei-x86-64") == 0
12144 	      || strcmp (*l, "mach-o-x86-64") == 0)
12145 	    {
12146 	      default_arch = "x86_64";
12147 	      break;
12148 	    }
12149 	if (*l == NULL)
12150 	  as_fatal (_("no compiled in support for x86_64"));
12151 	free (list);
12152       }
12153       break;
12154 #endif
12155 
12156 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12157     case OPTION_X32:
12158       if (IS_ELF)
12159 	{
12160 	  const char **list, **l;
12161 
12162 	  list = bfd_target_list ();
12163 	  for (l = list; *l != NULL; l++)
12164 	    if (CONST_STRNEQ (*l, "elf32-x86-64"))
12165 	      {
12166 		default_arch = "x86_64:32";
12167 		break;
12168 	      }
12169 	  if (*l == NULL)
12170 	    as_fatal (_("no compiled in support for 32bit x86_64"));
12171 	  free (list);
12172 	}
12173       else
12174 	as_fatal (_("32bit x86_64 is only supported for ELF"));
12175       break;
12176 #endif
12177 
12178     case OPTION_32:
12179       default_arch = "i386";
12180       break;
12181 
12182     case OPTION_DIVIDE:
12183 #ifdef SVR4_COMMENT_CHARS
12184       {
12185 	char *n, *t;
12186 	const char *s;
12187 
12188 	n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
12189 	t = n;
12190 	for (s = i386_comment_chars; *s != '\0'; s++)
12191 	  if (*s != '/')
12192 	    *t++ = *s;
12193 	*t = '\0';
12194 	i386_comment_chars = n;
12195       }
12196 #endif
12197       break;
12198 
12199     case OPTION_MARCH:
12200       saved = xstrdup (arg);
12201       arch = saved;
12202       /* Allow -march=+nosse.  */
12203       if (*arch == '+')
12204 	arch++;
12205       do
12206 	{
12207 	  if (*arch == '.')
12208 	    as_fatal (_("invalid -march= option: `%s'"), arg);
12209 	  next = strchr (arch, '+');
12210 	  if (next)
12211 	    *next++ = '\0';
12212 	  for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12213 	    {
12214 	      if (strcmp (arch, cpu_arch [j].name) == 0)
12215 		{
12216 		  /* Processor.  */
12217 		  if (! cpu_arch[j].flags.bitfield.cpui386)
12218 		    continue;
12219 
12220 		  cpu_arch_name = cpu_arch[j].name;
12221 		  cpu_sub_arch_name = NULL;
12222 		  cpu_arch_flags = cpu_arch[j].flags;
12223 		  cpu_arch_isa = cpu_arch[j].type;
12224 		  cpu_arch_isa_flags = cpu_arch[j].flags;
12225 		  if (!cpu_arch_tune_set)
12226 		    {
12227 		      cpu_arch_tune = cpu_arch_isa;
12228 		      cpu_arch_tune_flags = cpu_arch_isa_flags;
12229 		    }
12230 		  break;
12231 		}
12232 	      else if (*cpu_arch [j].name == '.'
12233 		       && strcmp (arch, cpu_arch [j].name + 1) == 0)
12234 		{
12235 		  /* ISA extension.  */
12236 		  i386_cpu_flags flags;
12237 
12238 		  flags = cpu_flags_or (cpu_arch_flags,
12239 					cpu_arch[j].flags);
12240 
12241 		  if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12242 		    {
12243 		      if (cpu_sub_arch_name)
12244 			{
12245 			  char *name = cpu_sub_arch_name;
12246 			  cpu_sub_arch_name = concat (name,
12247 						      cpu_arch[j].name,
12248 						      (const char *) NULL);
12249 			  free (name);
12250 			}
12251 		      else
12252 			cpu_sub_arch_name = xstrdup (cpu_arch[j].name);
12253 		      cpu_arch_flags = flags;
12254 		      cpu_arch_isa_flags = flags;
12255 		    }
12256 		  else
12257 		    cpu_arch_isa_flags
12258 		      = cpu_flags_or (cpu_arch_isa_flags,
12259 				      cpu_arch[j].flags);
12260 		  break;
12261 		}
12262 	    }
12263 
12264 	  if (j >= ARRAY_SIZE (cpu_arch))
12265 	    {
12266 	      /* Disable an ISA extension.  */
12267 	      for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12268 		if (strcmp (arch, cpu_noarch [j].name) == 0)
12269 		  {
12270 		    i386_cpu_flags flags;
12271 
12272 		    flags = cpu_flags_and_not (cpu_arch_flags,
12273 					       cpu_noarch[j].flags);
12274 		    if (!cpu_flags_equal (&flags, &cpu_arch_flags))
12275 		      {
12276 			if (cpu_sub_arch_name)
12277 			  {
12278 			    char *name = cpu_sub_arch_name;
12279 			    cpu_sub_arch_name = concat (arch,
12280 							(const char *) NULL);
12281 			    free (name);
12282 			  }
12283 			else
12284 			  cpu_sub_arch_name = xstrdup (arch);
12285 			cpu_arch_flags = flags;
12286 			cpu_arch_isa_flags = flags;
12287 		      }
12288 		    break;
12289 		  }
12290 
12291 	      if (j >= ARRAY_SIZE (cpu_noarch))
12292 		j = ARRAY_SIZE (cpu_arch);
12293 	    }
12294 
12295 	  if (j >= ARRAY_SIZE (cpu_arch))
12296 	    as_fatal (_("invalid -march= option: `%s'"), arg);
12297 
12298 	  arch = next;
12299 	}
12300       while (next != NULL);
12301       free (saved);
12302       break;
12303 
12304     case OPTION_MTUNE:
12305       if (*arg == '.')
12306 	as_fatal (_("invalid -mtune= option: `%s'"), arg);
12307       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12308 	{
12309 	  if (strcmp (arg, cpu_arch [j].name) == 0)
12310 	    {
12311 	      cpu_arch_tune_set = 1;
12312 	      cpu_arch_tune = cpu_arch [j].type;
12313 	      cpu_arch_tune_flags = cpu_arch[j].flags;
12314 	      break;
12315 	    }
12316 	}
12317       if (j >= ARRAY_SIZE (cpu_arch))
12318 	as_fatal (_("invalid -mtune= option: `%s'"), arg);
12319       break;
12320 
12321     case OPTION_MMNEMONIC:
12322       if (strcasecmp (arg, "att") == 0)
12323 	intel_mnemonic = 0;
12324       else if (strcasecmp (arg, "intel") == 0)
12325 	intel_mnemonic = 1;
12326       else
12327 	as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
12328       break;
12329 
12330     case OPTION_MSYNTAX:
12331       if (strcasecmp (arg, "att") == 0)
12332 	intel_syntax = 0;
12333       else if (strcasecmp (arg, "intel") == 0)
12334 	intel_syntax = 1;
12335       else
12336 	as_fatal (_("invalid -msyntax= option: `%s'"), arg);
12337       break;
12338 
12339     case OPTION_MINDEX_REG:
12340       allow_index_reg = 1;
12341       break;
12342 
12343     case OPTION_MNAKED_REG:
12344       allow_naked_reg = 1;
12345       break;
12346 
12347     case OPTION_MSSE2AVX:
12348       sse2avx = 1;
12349       break;
12350 
12351     case OPTION_MSSE_CHECK:
12352       if (strcasecmp (arg, "error") == 0)
12353 	sse_check = check_error;
12354       else if (strcasecmp (arg, "warning") == 0)
12355 	sse_check = check_warning;
12356       else if (strcasecmp (arg, "none") == 0)
12357 	sse_check = check_none;
12358       else
12359 	as_fatal (_("invalid -msse-check= option: `%s'"), arg);
12360       break;
12361 
12362     case OPTION_MOPERAND_CHECK:
12363       if (strcasecmp (arg, "error") == 0)
12364 	operand_check = check_error;
12365       else if (strcasecmp (arg, "warning") == 0)
12366 	operand_check = check_warning;
12367       else if (strcasecmp (arg, "none") == 0)
12368 	operand_check = check_none;
12369       else
12370 	as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
12371       break;
12372 
12373     case OPTION_MAVXSCALAR:
12374       if (strcasecmp (arg, "128") == 0)
12375 	avxscalar = vex128;
12376       else if (strcasecmp (arg, "256") == 0)
12377 	avxscalar = vex256;
12378       else
12379 	as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
12380       break;
12381 
12382     case OPTION_MVEXWIG:
12383       if (strcmp (arg, "0") == 0)
12384 	vexwig = vexw0;
12385       else if (strcmp (arg, "1") == 0)
12386 	vexwig = vexw1;
12387       else
12388 	as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
12389       break;
12390 
12391     case OPTION_MADD_BND_PREFIX:
12392       add_bnd_prefix = 1;
12393       break;
12394 
12395     case OPTION_MEVEXLIG:
12396       if (strcmp (arg, "128") == 0)
12397 	evexlig = evexl128;
12398       else if (strcmp (arg, "256") == 0)
12399 	evexlig = evexl256;
12400       else  if (strcmp (arg, "512") == 0)
12401 	evexlig = evexl512;
12402       else
12403 	as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
12404       break;
12405 
12406     case OPTION_MEVEXRCIG:
12407       if (strcmp (arg, "rne") == 0)
12408 	evexrcig = rne;
12409       else if (strcmp (arg, "rd") == 0)
12410 	evexrcig = rd;
12411       else if (strcmp (arg, "ru") == 0)
12412 	evexrcig = ru;
12413       else if (strcmp (arg, "rz") == 0)
12414 	evexrcig = rz;
12415       else
12416 	as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
12417       break;
12418 
12419     case OPTION_MEVEXWIG:
12420       if (strcmp (arg, "0") == 0)
12421 	evexwig = evexw0;
12422       else if (strcmp (arg, "1") == 0)
12423 	evexwig = evexw1;
12424       else
12425 	as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
12426       break;
12427 
12428 # if defined (TE_PE) || defined (TE_PEP)
12429     case OPTION_MBIG_OBJ:
12430       use_big_obj = 1;
12431       break;
12432 #endif
12433 
12434     case OPTION_MOMIT_LOCK_PREFIX:
12435       if (strcasecmp (arg, "yes") == 0)
12436         omit_lock_prefix = 1;
12437       else if (strcasecmp (arg, "no") == 0)
12438         omit_lock_prefix = 0;
12439       else
12440         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
12441       break;
12442 
12443     case OPTION_MFENCE_AS_LOCK_ADD:
12444       if (strcasecmp (arg, "yes") == 0)
12445         avoid_fence = 1;
12446       else if (strcasecmp (arg, "no") == 0)
12447         avoid_fence = 0;
12448       else
12449         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
12450       break;
12451 
12452     case OPTION_MRELAX_RELOCATIONS:
12453       if (strcasecmp (arg, "yes") == 0)
12454         generate_relax_relocations = 1;
12455       else if (strcasecmp (arg, "no") == 0)
12456         generate_relax_relocations = 0;
12457       else
12458         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
12459       break;
12460 
12461     case OPTION_MALIGN_BRANCH_BOUNDARY:
12462       {
12463 	char *end;
12464 	long int align = strtoul (arg, &end, 0);
12465 	if (*end == '\0')
12466 	  {
12467 	    if (align == 0)
12468 	      {
12469 		align_branch_power = 0;
12470 		break;
12471 	      }
12472 	    else if (align >= 16)
12473 	      {
12474 		int align_power;
12475 		for (align_power = 0;
12476 		     (align & 1) == 0;
12477 		     align >>= 1, align_power++)
12478 		  continue;
12479 		/* Limit alignment power to 31.  */
12480 		if (align == 1 && align_power < 32)
12481 		  {
12482 		    align_branch_power = align_power;
12483 		    break;
12484 		  }
12485 	      }
12486 	  }
12487 	as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
12488       }
12489       break;
12490 
12491     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
12492       {
12493 	char *end;
12494 	int align = strtoul (arg, &end, 0);
12495 	/* Some processors only support 5 prefixes.  */
12496 	if (*end == '\0' && align >= 0 && align < 6)
12497 	  {
12498 	    align_branch_prefix_size = align;
12499 	    break;
12500 	  }
12501 	as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
12502 		  arg);
12503       }
12504       break;
12505 
12506     case OPTION_MALIGN_BRANCH:
12507       align_branch = 0;
12508       saved = xstrdup (arg);
12509       type = saved;
12510       do
12511 	{
12512 	  next = strchr (type, '+');
12513 	  if (next)
12514 	    *next++ = '\0';
12515 	  if (strcasecmp (type, "jcc") == 0)
12516 	    align_branch |= align_branch_jcc_bit;
12517 	  else if (strcasecmp (type, "fused") == 0)
12518 	    align_branch |= align_branch_fused_bit;
12519 	  else if (strcasecmp (type, "jmp") == 0)
12520 	    align_branch |= align_branch_jmp_bit;
12521 	  else if (strcasecmp (type, "call") == 0)
12522 	    align_branch |= align_branch_call_bit;
12523 	  else if (strcasecmp (type, "ret") == 0)
12524 	    align_branch |= align_branch_ret_bit;
12525 	  else if (strcasecmp (type, "indirect") == 0)
12526 	    align_branch |= align_branch_indirect_bit;
12527 	  else
12528 	    as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
12529 	  type = next;
12530 	}
12531       while (next != NULL);
12532       free (saved);
12533       break;
12534 
12535     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
12536       align_branch_power = 5;
12537       align_branch_prefix_size = 5;
12538       align_branch = (align_branch_jcc_bit
12539 		      | align_branch_fused_bit
12540 		      | align_branch_jmp_bit);
12541       break;
12542 
12543     case OPTION_MAMD64:
12544       intel64 = 0;
12545       break;
12546 
12547     case OPTION_MINTEL64:
12548       intel64 = 1;
12549       break;
12550 
12551     case 'O':
12552       if (arg == NULL)
12553 	{
12554 	  optimize = 1;
12555 	  /* Turn off -Os.  */
12556 	  optimize_for_space = 0;
12557 	}
12558       else if (*arg == 's')
12559 	{
12560 	  optimize_for_space = 1;
12561 	  /* Turn on all encoding optimizations.  */
12562 	  optimize = INT_MAX;
12563 	}
12564       else
12565 	{
12566 	  optimize = atoi (arg);
12567 	  /* Turn off -Os.  */
12568 	  optimize_for_space = 0;
12569 	}
12570       break;
12571 
12572     default:
12573       return 0;
12574     }
12575   return 1;
12576 }
12577 
12578 #define MESSAGE_TEMPLATE \
12579 "                                                                                "
12580 
12581 static char *
12582 output_message (FILE *stream, char *p, char *message, char *start,
12583 		int *left_p, const char *name, int len)
12584 {
12585   int size = sizeof (MESSAGE_TEMPLATE);
12586   int left = *left_p;
12587 
12588   /* Reserve 2 spaces for ", " or ",\0" */
12589   left -= len + 2;
12590 
12591   /* Check if there is any room.  */
12592   if (left >= 0)
12593     {
12594       if (p != start)
12595 	{
12596 	  *p++ = ',';
12597 	  *p++ = ' ';
12598 	}
12599       p = mempcpy (p, name, len);
12600     }
12601   else
12602     {
12603       /* Output the current message now and start a new one.  */
12604       *p++ = ',';
12605       *p = '\0';
12606       fprintf (stream, "%s\n", message);
12607       p = start;
12608       left = size - (start - message) - len - 2;
12609 
12610       gas_assert (left >= 0);
12611 
12612       p = mempcpy (p, name, len);
12613     }
12614 
12615   *left_p = left;
12616   return p;
12617 }
12618 
12619 static void
12620 show_arch (FILE *stream, int ext, int check)
12621 {
12622   static char message[] = MESSAGE_TEMPLATE;
12623   char *start = message + 27;
12624   char *p;
12625   int size = sizeof (MESSAGE_TEMPLATE);
12626   int left;
12627   const char *name;
12628   int len;
12629   unsigned int j;
12630 
12631   p = start;
12632   left = size - (start - message);
12633   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
12634     {
12635       /* Should it be skipped?  */
12636       if (cpu_arch [j].skip)
12637 	continue;
12638 
12639       name = cpu_arch [j].name;
12640       len = cpu_arch [j].len;
12641       if (*name == '.')
12642 	{
12643 	  /* It is an extension.  Skip if we aren't asked to show it.  */
12644 	  if (ext)
12645 	    {
12646 	      name++;
12647 	      len--;
12648 	    }
12649 	  else
12650 	    continue;
12651 	}
12652       else if (ext)
12653 	{
12654 	  /* It is an processor.  Skip if we show only extension.  */
12655 	  continue;
12656 	}
12657       else if (check && ! cpu_arch[j].flags.bitfield.cpui386)
12658 	{
12659 	  /* It is an impossible processor - skip.  */
12660 	  continue;
12661 	}
12662 
12663       p = output_message (stream, p, message, start, &left, name, len);
12664     }
12665 
12666   /* Display disabled extensions.  */
12667   if (ext)
12668     for (j = 0; j < ARRAY_SIZE (cpu_noarch); j++)
12669       {
12670 	name = cpu_noarch [j].name;
12671 	len = cpu_noarch [j].len;
12672 	p = output_message (stream, p, message, start, &left, name,
12673 			    len);
12674       }
12675 
12676   *p = '\0';
12677   fprintf (stream, "%s\n", message);
12678 }
12679 
12680 void
12681 md_show_usage (FILE *stream)
12682 {
12683 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12684   fprintf (stream, _("\
12685   -Qy, -Qn                ignored\n\
12686   -V                      print assembler version number\n\
12687   -k                      ignored\n"));
12688 #endif
12689   fprintf (stream, _("\
12690   -n                      Do not optimize code alignment\n\
12691   -q                      quieten some warnings\n"));
12692 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12693   fprintf (stream, _("\
12694   -s                      ignored\n"));
12695 #endif
12696 #if defined BFD64 && (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12697 		      || defined (TE_PE) || defined (TE_PEP))
12698   fprintf (stream, _("\
12699   --32/--64/--x32         generate 32bit/64bit/x32 code\n"));
12700 #endif
12701 #ifdef SVR4_COMMENT_CHARS
12702   fprintf (stream, _("\
12703   --divide                do not treat `/' as a comment character\n"));
12704 #else
12705   fprintf (stream, _("\
12706   --divide                ignored\n"));
12707 #endif
12708   fprintf (stream, _("\
12709   -march=CPU[,+EXTENSION...]\n\
12710                           generate code for CPU and EXTENSION, CPU is one of:\n"));
12711   show_arch (stream, 0, 1);
12712   fprintf (stream, _("\
12713                           EXTENSION is combination of:\n"));
12714   show_arch (stream, 1, 0);
12715   fprintf (stream, _("\
12716   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
12717   show_arch (stream, 0, 0);
12718   fprintf (stream, _("\
12719   -msse2avx               encode SSE instructions with VEX prefix\n"));
12720   fprintf (stream, _("\
12721   -msse-check=[none|error|warning] (default: warning)\n\
12722                           check SSE instructions\n"));
12723   fprintf (stream, _("\
12724   -moperand-check=[none|error|warning] (default: warning)\n\
12725                           check operand combinations for validity\n"));
12726   fprintf (stream, _("\
12727   -mavxscalar=[128|256] (default: 128)\n\
12728                           encode scalar AVX instructions with specific vector\n\
12729                            length\n"));
12730   fprintf (stream, _("\
12731   -mvexwig=[0|1] (default: 0)\n\
12732                           encode VEX instructions with specific VEX.W value\n\
12733                            for VEX.W bit ignored instructions\n"));
12734   fprintf (stream, _("\
12735   -mevexlig=[128|256|512] (default: 128)\n\
12736                           encode scalar EVEX instructions with specific vector\n\
12737                            length\n"));
12738   fprintf (stream, _("\
12739   -mevexwig=[0|1] (default: 0)\n\
12740                           encode EVEX instructions with specific EVEX.W value\n\
12741                            for EVEX.W bit ignored instructions\n"));
12742   fprintf (stream, _("\
12743   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
12744                           encode EVEX instructions with specific EVEX.RC value\n\
12745                            for SAE-only ignored instructions\n"));
12746   fprintf (stream, _("\
12747   -mmnemonic=[att|intel] "));
12748   if (SYSV386_COMPAT)
12749     fprintf (stream, _("(default: att)\n"));
12750   else
12751     fprintf (stream, _("(default: intel)\n"));
12752   fprintf (stream, _("\
12753                           use AT&T/Intel mnemonic\n"));
12754   fprintf (stream, _("\
12755   -msyntax=[att|intel] (default: att)\n\
12756                           use AT&T/Intel syntax\n"));
12757   fprintf (stream, _("\
12758   -mindex-reg             support pseudo index registers\n"));
12759   fprintf (stream, _("\
12760   -mnaked-reg             don't require `%%' prefix for registers\n"));
12761   fprintf (stream, _("\
12762   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
12763 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12764   fprintf (stream, _("\
12765   -mshared                disable branch optimization for shared code\n"));
12766   fprintf (stream, _("\
12767   -mx86-used-note=[no|yes] "));
12768   if (DEFAULT_X86_USED_NOTE)
12769     fprintf (stream, _("(default: yes)\n"));
12770   else
12771     fprintf (stream, _("(default: no)\n"));
12772   fprintf (stream, _("\
12773                           generate x86 used ISA and feature properties\n"));
12774 #endif
12775 #if defined (TE_PE) || defined (TE_PEP)
12776   fprintf (stream, _("\
12777   -mbig-obj               generate big object files\n"));
12778 #endif
12779   fprintf (stream, _("\
12780   -momit-lock-prefix=[no|yes] (default: no)\n\
12781                           strip all lock prefixes\n"));
12782   fprintf (stream, _("\
12783   -mfence-as-lock-add=[no|yes] (default: no)\n\
12784                           encode lfence, mfence and sfence as\n\
12785                            lock addl $0x0, (%%{re}sp)\n"));
12786   fprintf (stream, _("\
12787   -mrelax-relocations=[no|yes] "));
12788   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
12789     fprintf (stream, _("(default: yes)\n"));
12790   else
12791     fprintf (stream, _("(default: no)\n"));
12792   fprintf (stream, _("\
12793                           generate relax relocations\n"));
12794   fprintf (stream, _("\
12795   -malign-branch-boundary=NUM (default: 0)\n\
12796                           align branches within NUM byte boundary\n"));
12797   fprintf (stream, _("\
12798   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
12799                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
12800                            indirect\n\
12801                           specify types of branches to align\n"));
12802   fprintf (stream, _("\
12803   -malign-branch-prefix-size=NUM (default: 5)\n\
12804                           align branches with NUM prefixes per instruction\n"));
12805   fprintf (stream, _("\
12806   -mbranches-within-32B-boundaries\n\
12807                           align branches within 32 byte boundary\n"));
12808   fprintf (stream, _("\
12809   -mamd64                 accept only AMD64 ISA [default]\n"));
12810   fprintf (stream, _("\
12811   -mintel64               accept only Intel64 ISA\n"));
12812 }
12813 
12814 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
12815      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
12816      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
12817 
12818 /* Pick the target format to use.  */
12819 
12820 const char *
12821 i386_target_format (void)
12822 {
12823   if (!strncmp (default_arch, "x86_64", 6))
12824     {
12825       update_code_flag (CODE_64BIT, 1);
12826       if (default_arch[6] == '\0')
12827 	x86_elf_abi = X86_64_ABI;
12828       else
12829 	x86_elf_abi = X86_64_X32_ABI;
12830     }
12831   else if (!strcmp (default_arch, "i386"))
12832     update_code_flag (CODE_32BIT, 1);
12833   else if (!strcmp (default_arch, "iamcu"))
12834     {
12835       update_code_flag (CODE_32BIT, 1);
12836       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
12837 	{
12838 	  static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
12839 	  cpu_arch_name = "iamcu";
12840 	  cpu_sub_arch_name = NULL;
12841 	  cpu_arch_flags = iamcu_flags;
12842 	  cpu_arch_isa = PROCESSOR_IAMCU;
12843 	  cpu_arch_isa_flags = iamcu_flags;
12844 	  if (!cpu_arch_tune_set)
12845 	    {
12846 	      cpu_arch_tune = cpu_arch_isa;
12847 	      cpu_arch_tune_flags = cpu_arch_isa_flags;
12848 	    }
12849 	}
12850       else if (cpu_arch_isa != PROCESSOR_IAMCU)
12851 	as_fatal (_("Intel MCU doesn't support `%s' architecture"),
12852 		  cpu_arch_name);
12853     }
12854   else
12855     as_fatal (_("unknown architecture"));
12856 
12857   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
12858     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].flags;
12859   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
12860     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].flags;
12861 
12862   switch (OUTPUT_FLAVOR)
12863     {
12864 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
12865     case bfd_target_aout_flavour:
12866       return AOUT_TARGET_FORMAT;
12867 #endif
12868 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
12869 # if defined (TE_PE) || defined (TE_PEP)
12870     case bfd_target_coff_flavour:
12871       if (flag_code == CODE_64BIT)
12872 	return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
12873       else
12874 	return "pe-i386";
12875 # elif defined (TE_GO32)
12876     case bfd_target_coff_flavour:
12877       return "coff-go32";
12878 # else
12879     case bfd_target_coff_flavour:
12880       return "coff-i386";
12881 # endif
12882 #endif
12883 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
12884     case bfd_target_elf_flavour:
12885       {
12886 	const char *format;
12887 
12888 	switch (x86_elf_abi)
12889 	  {
12890 	  default:
12891 	    format = ELF_TARGET_FORMAT;
12892 #ifndef TE_SOLARIS
12893 	    tls_get_addr = "___tls_get_addr";
12894 #endif
12895 	    break;
12896 	  case X86_64_ABI:
12897 	    use_rela_relocations = 1;
12898 	    object_64bit = 1;
12899 #ifndef TE_SOLARIS
12900 	    tls_get_addr = "__tls_get_addr";
12901 #endif
12902 	    format = ELF_TARGET_FORMAT64;
12903 	    break;
12904 	  case X86_64_X32_ABI:
12905 	    use_rela_relocations = 1;
12906 	    object_64bit = 1;
12907 #ifndef TE_SOLARIS
12908 	    tls_get_addr = "__tls_get_addr";
12909 #endif
12910 	    disallow_64bit_reloc = 1;
12911 	    format = ELF_TARGET_FORMAT32;
12912 	    break;
12913 	  }
12914 	if (cpu_arch_isa == PROCESSOR_L1OM)
12915 	  {
12916 	    if (x86_elf_abi != X86_64_ABI)
12917 	      as_fatal (_("Intel L1OM is 64bit only"));
12918 	    return ELF_TARGET_L1OM_FORMAT;
12919 	  }
12920 	else if (cpu_arch_isa == PROCESSOR_K1OM)
12921 	  {
12922 	    if (x86_elf_abi != X86_64_ABI)
12923 	      as_fatal (_("Intel K1OM is 64bit only"));
12924 	    return ELF_TARGET_K1OM_FORMAT;
12925 	  }
12926 	else if (cpu_arch_isa == PROCESSOR_IAMCU)
12927 	  {
12928 	    if (x86_elf_abi != I386_ABI)
12929 	      as_fatal (_("Intel MCU is 32bit only"));
12930 	    return ELF_TARGET_IAMCU_FORMAT;
12931 	  }
12932 	else
12933 	  return format;
12934       }
12935 #endif
12936 #if defined (OBJ_MACH_O)
12937     case bfd_target_mach_o_flavour:
12938       if (flag_code == CODE_64BIT)
12939 	{
12940 	  use_rela_relocations = 1;
12941 	  object_64bit = 1;
12942 	  return "mach-o-x86-64";
12943 	}
12944       else
12945 	return "mach-o-i386";
12946 #endif
12947     default:
12948       abort ();
12949       return NULL;
12950     }
12951 }
12952 
12953 #endif /* OBJ_MAYBE_ more than one  */
12954 
12955 symbolS *
12956 md_undefined_symbol (char *name)
12957 {
12958   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
12959       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
12960       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
12961       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
12962     {
12963       if (!GOT_symbol)
12964 	{
12965 	  if (symbol_find (name))
12966 	    as_bad (_("GOT already in symbol table"));
12967 	  GOT_symbol = symbol_new (name, undefined_section,
12968 				   (valueT) 0, &zero_address_frag);
12969 	};
12970       return GOT_symbol;
12971     }
12972   return 0;
12973 }
12974 
12975 /* Round up a section size to the appropriate boundary.  */
12976 
12977 valueT
12978 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
12979 {
12980 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
12981   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
12982     {
12983       /* For a.out, force the section size to be aligned.  If we don't do
12984 	 this, BFD will align it for us, but it will not write out the
12985 	 final bytes of the section.  This may be a bug in BFD, but it is
12986 	 easier to fix it here since that is how the other a.out targets
12987 	 work.  */
12988       int align;
12989 
12990       align = bfd_section_alignment (segment);
12991       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
12992     }
12993 #endif
12994 
12995   return size;
12996 }
12997 
12998 /* On the i386, PC-relative offsets are relative to the start of the
12999    next instruction.  That is, the address of the offset, plus its
13000    size, since the offset is always the last part of the insn.  */
13001 
13002 long
13003 md_pcrel_from (fixS *fixP)
13004 {
13005   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
13006 }
13007 
13008 #ifndef I386COFF
13009 
13010 static void
13011 s_bss (int ignore ATTRIBUTE_UNUSED)
13012 {
13013   int temp;
13014 
13015 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13016   if (IS_ELF)
13017     obj_elf_section_change_hook ();
13018 #endif
13019   temp = get_absolute_expression ();
13020   subseg_set (bss_section, (subsegT) temp);
13021   demand_empty_rest_of_line ();
13022 }
13023 
13024 #endif
13025 
13026 /* Remember constant directive.  */
13027 
13028 void
13029 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
13030 {
13031   if (last_insn.kind != last_insn_directive
13032       && (bfd_section_flags (now_seg) & SEC_CODE))
13033     {
13034       last_insn.seg = now_seg;
13035       last_insn.kind = last_insn_directive;
13036       last_insn.name = "constant directive";
13037       last_insn.file = as_where (&last_insn.line);
13038     }
13039 }
13040 
13041 void
13042 i386_validate_fix (fixS *fixp)
13043 {
13044   if (fixp->fx_subsy)
13045     {
13046       if (fixp->fx_subsy == GOT_symbol)
13047 	{
13048 	  if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
13049 	    {
13050 	      if (!object_64bit)
13051 		abort ();
13052 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13053 	      if (fixp->fx_tcbit2)
13054 		fixp->fx_r_type = (fixp->fx_tcbit
13055 				   ? BFD_RELOC_X86_64_REX_GOTPCRELX
13056 				   : BFD_RELOC_X86_64_GOTPCRELX);
13057 	      else
13058 #endif
13059 		fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
13060 	    }
13061 	  else
13062 	    {
13063 	      if (!object_64bit)
13064 		fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
13065 	      else
13066 		fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
13067 	    }
13068 	  fixp->fx_subsy = 0;
13069 	}
13070     }
13071 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13072   else if (!object_64bit)
13073     {
13074       if (fixp->fx_r_type == BFD_RELOC_386_GOT32
13075 	  && fixp->fx_tcbit2)
13076 	fixp->fx_r_type = BFD_RELOC_386_GOT32X;
13077     }
13078 #endif
13079 }
13080 
13081 arelent *
13082 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
13083 {
13084   arelent *rel;
13085   bfd_reloc_code_real_type code;
13086 
13087   switch (fixp->fx_r_type)
13088     {
13089 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13090     case BFD_RELOC_SIZE32:
13091     case BFD_RELOC_SIZE64:
13092       if (S_IS_DEFINED (fixp->fx_addsy)
13093 	  && !S_IS_EXTERNAL (fixp->fx_addsy))
13094 	{
13095 	  /* Resolve size relocation against local symbol to size of
13096 	     the symbol plus addend.  */
13097 	  valueT value = S_GET_SIZE (fixp->fx_addsy) + fixp->fx_offset;
13098 	  if (fixp->fx_r_type == BFD_RELOC_SIZE32
13099 	      && !fits_in_unsigned_long (value))
13100 	    as_bad_where (fixp->fx_file, fixp->fx_line,
13101 			  _("symbol size computation overflow"));
13102 	  fixp->fx_addsy = NULL;
13103 	  fixp->fx_subsy = NULL;
13104 	  md_apply_fix (fixp, (valueT *) &value, NULL);
13105 	  return NULL;
13106 	}
13107 #endif
13108       /* Fall through.  */
13109 
13110     case BFD_RELOC_X86_64_PLT32:
13111     case BFD_RELOC_X86_64_GOT32:
13112     case BFD_RELOC_X86_64_GOTPCREL:
13113     case BFD_RELOC_X86_64_GOTPCRELX:
13114     case BFD_RELOC_X86_64_REX_GOTPCRELX:
13115     case BFD_RELOC_386_PLT32:
13116     case BFD_RELOC_386_GOT32:
13117     case BFD_RELOC_386_GOT32X:
13118     case BFD_RELOC_386_GOTOFF:
13119     case BFD_RELOC_386_GOTPC:
13120     case BFD_RELOC_386_TLS_GD:
13121     case BFD_RELOC_386_TLS_LDM:
13122     case BFD_RELOC_386_TLS_LDO_32:
13123     case BFD_RELOC_386_TLS_IE_32:
13124     case BFD_RELOC_386_TLS_IE:
13125     case BFD_RELOC_386_TLS_GOTIE:
13126     case BFD_RELOC_386_TLS_LE_32:
13127     case BFD_RELOC_386_TLS_LE:
13128     case BFD_RELOC_386_TLS_GOTDESC:
13129     case BFD_RELOC_386_TLS_DESC_CALL:
13130     case BFD_RELOC_X86_64_TLSGD:
13131     case BFD_RELOC_X86_64_TLSLD:
13132     case BFD_RELOC_X86_64_DTPOFF32:
13133     case BFD_RELOC_X86_64_DTPOFF64:
13134     case BFD_RELOC_X86_64_GOTTPOFF:
13135     case BFD_RELOC_X86_64_TPOFF32:
13136     case BFD_RELOC_X86_64_TPOFF64:
13137     case BFD_RELOC_X86_64_GOTOFF64:
13138     case BFD_RELOC_X86_64_GOTPC32:
13139     case BFD_RELOC_X86_64_GOT64:
13140     case BFD_RELOC_X86_64_GOTPCREL64:
13141     case BFD_RELOC_X86_64_GOTPC64:
13142     case BFD_RELOC_X86_64_GOTPLT64:
13143     case BFD_RELOC_X86_64_PLTOFF64:
13144     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13145     case BFD_RELOC_X86_64_TLSDESC_CALL:
13146     case BFD_RELOC_RVA:
13147     case BFD_RELOC_VTABLE_ENTRY:
13148     case BFD_RELOC_VTABLE_INHERIT:
13149 #ifdef TE_PE
13150     case BFD_RELOC_32_SECREL:
13151 #endif
13152       code = fixp->fx_r_type;
13153       break;
13154     case BFD_RELOC_X86_64_32S:
13155       if (!fixp->fx_pcrel)
13156 	{
13157 	  /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
13158 	  code = fixp->fx_r_type;
13159 	  break;
13160 	}
13161       /* Fall through.  */
13162     default:
13163       if (fixp->fx_pcrel)
13164 	{
13165 	  switch (fixp->fx_size)
13166 	    {
13167 	    default:
13168 	      as_bad_where (fixp->fx_file, fixp->fx_line,
13169 			    _("can not do %d byte pc-relative relocation"),
13170 			    fixp->fx_size);
13171 	      code = BFD_RELOC_32_PCREL;
13172 	      break;
13173 	    case 1: code = BFD_RELOC_8_PCREL;  break;
13174 	    case 2: code = BFD_RELOC_16_PCREL; break;
13175 	    case 4: code = BFD_RELOC_32_PCREL; break;
13176 #ifdef BFD64
13177 	    case 8: code = BFD_RELOC_64_PCREL; break;
13178 #endif
13179 	    }
13180 	}
13181       else
13182 	{
13183 	  switch (fixp->fx_size)
13184 	    {
13185 	    default:
13186 	      as_bad_where (fixp->fx_file, fixp->fx_line,
13187 			    _("can not do %d byte relocation"),
13188 			    fixp->fx_size);
13189 	      code = BFD_RELOC_32;
13190 	      break;
13191 	    case 1: code = BFD_RELOC_8;  break;
13192 	    case 2: code = BFD_RELOC_16; break;
13193 	    case 4: code = BFD_RELOC_32; break;
13194 #ifdef BFD64
13195 	    case 8: code = BFD_RELOC_64; break;
13196 #endif
13197 	    }
13198 	}
13199       break;
13200     }
13201 
13202   if ((code == BFD_RELOC_32
13203        || code == BFD_RELOC_32_PCREL
13204        || code == BFD_RELOC_X86_64_32S)
13205       && GOT_symbol
13206       && fixp->fx_addsy == GOT_symbol)
13207     {
13208       if (!object_64bit)
13209 	code = BFD_RELOC_386_GOTPC;
13210       else
13211 	code = BFD_RELOC_X86_64_GOTPC32;
13212     }
13213   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
13214       && GOT_symbol
13215       && fixp->fx_addsy == GOT_symbol)
13216     {
13217       code = BFD_RELOC_X86_64_GOTPC64;
13218     }
13219 
13220   rel = XNEW (arelent);
13221   rel->sym_ptr_ptr = XNEW (asymbol *);
13222   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
13223 
13224   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
13225 
13226   if (!use_rela_relocations)
13227     {
13228       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
13229 	 vtable entry to be used in the relocation's section offset.  */
13230       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
13231 	rel->address = fixp->fx_offset;
13232 #if defined (OBJ_COFF) && defined (TE_PE)
13233       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
13234 	rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
13235       else
13236 #endif
13237       rel->addend = 0;
13238     }
13239   /* Use the rela in 64bit mode.  */
13240   else
13241     {
13242       if (disallow_64bit_reloc)
13243 	switch (code)
13244 	  {
13245 	  case BFD_RELOC_X86_64_DTPOFF64:
13246 	  case BFD_RELOC_X86_64_TPOFF64:
13247 	  case BFD_RELOC_64_PCREL:
13248 	  case BFD_RELOC_X86_64_GOTOFF64:
13249 	  case BFD_RELOC_X86_64_GOT64:
13250 	  case BFD_RELOC_X86_64_GOTPCREL64:
13251 	  case BFD_RELOC_X86_64_GOTPC64:
13252 	  case BFD_RELOC_X86_64_GOTPLT64:
13253 	  case BFD_RELOC_X86_64_PLTOFF64:
13254 	    as_bad_where (fixp->fx_file, fixp->fx_line,
13255 			  _("cannot represent relocation type %s in x32 mode"),
13256 			  bfd_get_reloc_code_name (code));
13257 	    break;
13258 	  default:
13259 	    break;
13260 	  }
13261 
13262       if (!fixp->fx_pcrel)
13263 	rel->addend = fixp->fx_offset;
13264       else
13265 	switch (code)
13266 	  {
13267 	  case BFD_RELOC_X86_64_PLT32:
13268 	  case BFD_RELOC_X86_64_GOT32:
13269 	  case BFD_RELOC_X86_64_GOTPCREL:
13270 	  case BFD_RELOC_X86_64_GOTPCRELX:
13271 	  case BFD_RELOC_X86_64_REX_GOTPCRELX:
13272 	  case BFD_RELOC_X86_64_TLSGD:
13273 	  case BFD_RELOC_X86_64_TLSLD:
13274 	  case BFD_RELOC_X86_64_GOTTPOFF:
13275 	  case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13276 	  case BFD_RELOC_X86_64_TLSDESC_CALL:
13277 	    rel->addend = fixp->fx_offset - fixp->fx_size;
13278 	    break;
13279 	  default:
13280 	    rel->addend = (section->vma
13281 			   - fixp->fx_size
13282 			   + fixp->fx_addnumber
13283 			   + md_pcrel_from (fixp));
13284 	    break;
13285 	  }
13286     }
13287 
13288   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
13289   if (rel->howto == NULL)
13290     {
13291       as_bad_where (fixp->fx_file, fixp->fx_line,
13292 		    _("cannot represent relocation type %s"),
13293 		    bfd_get_reloc_code_name (code));
13294       /* Set howto to a garbage value so that we can keep going.  */
13295       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
13296       gas_assert (rel->howto != NULL);
13297     }
13298 
13299   return rel;
13300 }
13301 
13302 #include "tc-i386-intel.c"
13303 
13304 void
13305 tc_x86_parse_to_dw2regnum (expressionS *exp)
13306 {
13307   int saved_naked_reg;
13308   char saved_register_dot;
13309 
13310   saved_naked_reg = allow_naked_reg;
13311   allow_naked_reg = 1;
13312   saved_register_dot = register_chars['.'];
13313   register_chars['.'] = '.';
13314   allow_pseudo_reg = 1;
13315   expression_and_evaluate (exp);
13316   allow_pseudo_reg = 0;
13317   register_chars['.'] = saved_register_dot;
13318   allow_naked_reg = saved_naked_reg;
13319 
13320   if (exp->X_op == O_register && exp->X_add_number >= 0)
13321     {
13322       if ((addressT) exp->X_add_number < i386_regtab_size)
13323 	{
13324 	  exp->X_op = O_constant;
13325 	  exp->X_add_number = i386_regtab[exp->X_add_number]
13326 			      .dw2_regnum[flag_code >> 1];
13327 	}
13328       else
13329 	exp->X_op = O_illegal;
13330     }
13331 }
13332 
13333 void
13334 tc_x86_frame_initial_instructions (void)
13335 {
13336   static unsigned int sp_regno[2];
13337 
13338   if (!sp_regno[flag_code >> 1])
13339     {
13340       char *saved_input = input_line_pointer;
13341       char sp[][4] = {"esp", "rsp"};
13342       expressionS exp;
13343 
13344       input_line_pointer = sp[flag_code >> 1];
13345       tc_x86_parse_to_dw2regnum (&exp);
13346       gas_assert (exp.X_op == O_constant);
13347       sp_regno[flag_code >> 1] = exp.X_add_number;
13348       input_line_pointer = saved_input;
13349     }
13350 
13351   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
13352   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
13353 }
13354 
13355 int
13356 x86_dwarf2_addr_size (void)
13357 {
13358 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
13359   if (x86_elf_abi == X86_64_X32_ABI)
13360     return 4;
13361 #endif
13362   return bfd_arch_bits_per_address (stdoutput) / 8;
13363 }
13364 
13365 int
13366 i386_elf_section_type (const char *str, size_t len)
13367 {
13368   if (flag_code == CODE_64BIT
13369       && len == sizeof ("unwind") - 1
13370       && strncmp (str, "unwind", 6) == 0)
13371     return SHT_X86_64_UNWIND;
13372 
13373   return -1;
13374 }
13375 
13376 #ifdef TE_SOLARIS
13377 void
13378 i386_solaris_fix_up_eh_frame (segT sec)
13379 {
13380   if (flag_code == CODE_64BIT)
13381     elf_section_type (sec) = SHT_X86_64_UNWIND;
13382 }
13383 #endif
13384 
13385 #ifdef TE_PE
13386 void
13387 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
13388 {
13389   expressionS exp;
13390 
13391   exp.X_op = O_secrel;
13392   exp.X_add_symbol = symbol;
13393   exp.X_add_number = 0;
13394   emit_expr (&exp, size);
13395 }
13396 #endif
13397 
13398 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13399 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
13400 
13401 bfd_vma
13402 x86_64_section_letter (int letter, const char **ptr_msg)
13403 {
13404   if (flag_code == CODE_64BIT)
13405     {
13406       if (letter == 'l')
13407 	return SHF_X86_64_LARGE;
13408 
13409       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
13410     }
13411   else
13412     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
13413   return -1;
13414 }
13415 
13416 bfd_vma
13417 x86_64_section_word (char *str, size_t len)
13418 {
13419   if (len == 5 && flag_code == CODE_64BIT && CONST_STRNEQ (str, "large"))
13420     return SHF_X86_64_LARGE;
13421 
13422   return -1;
13423 }
13424 
13425 static void
13426 handle_large_common (int small ATTRIBUTE_UNUSED)
13427 {
13428   if (flag_code != CODE_64BIT)
13429     {
13430       s_comm_internal (0, elf_common_parse);
13431       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
13432     }
13433   else
13434     {
13435       static segT lbss_section;
13436       asection *saved_com_section_ptr = elf_com_section_ptr;
13437       asection *saved_bss_section = bss_section;
13438 
13439       if (lbss_section == NULL)
13440 	{
13441 	  flagword applicable;
13442 	  segT seg = now_seg;
13443 	  subsegT subseg = now_subseg;
13444 
13445 	  /* The .lbss section is for local .largecomm symbols.  */
13446 	  lbss_section = subseg_new (".lbss", 0);
13447 	  applicable = bfd_applicable_section_flags (stdoutput);
13448 	  bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
13449 	  seg_info (lbss_section)->bss = 1;
13450 
13451 	  subseg_set (seg, subseg);
13452 	}
13453 
13454       elf_com_section_ptr = &_bfd_elf_large_com_section;
13455       bss_section = lbss_section;
13456 
13457       s_comm_internal (0, elf_common_parse);
13458 
13459       elf_com_section_ptr = saved_com_section_ptr;
13460       bss_section = saved_bss_section;
13461     }
13462 }
13463 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */
13464