1 /* ----------------------------------------------------------------------- *
2  *
3  *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
4  *   See the file AUTHORS included with the NASM distribution for
5  *   the specific copyright holders.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following
9  *   conditions are met:
10  *
11  *   * Redistributions of source code must retain the above copyright
12  *     notice, this list of conditions and the following disclaimer.
13  *   * Redistributions in binary form must reproduce the above
14  *     copyright notice, this list of conditions and the following
15  *     disclaimer in the documentation and/or other materials provided
16  *     with the distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
19  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
20  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21  *     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  *     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23  *     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  *     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  *     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  *     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
29  *     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
30  *     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  *
32  * ----------------------------------------------------------------------- */
33 
34 /*
35  * nasm.h   main header file for the Netwide Assembler: inter-module interface
36  */
37 
38 #ifndef NASM_NASM_H
39 #define NASM_NASM_H
40 
41 #include "compiler.h"
42 
43 #include <stdio.h>
44 #include <time.h>
45 
46 #include "nasmlib.h"
47 #include "strlist.h"
48 #include "preproc.h"
49 #include "insnsi.h"     /* For enum opcode */
50 #include "directiv.h"   /* For enum directive */
51 #include "labels.h"     /* For enum mangle_index, enum label_type */
52 #include "opflags.h"
53 #include "regs.h"
54 
55 /* Time stamp for the official start of compilation */
56 struct compile_time {
57     time_t t;
58     bool have_local, have_gm, have_posix;
59     int64_t posix;
60     struct tm local;
61     struct tm gm;
62 };
63 extern struct compile_time official_compile_time;
64 
65 #define NO_SEG  INT32_C(-1)     /* null segment value */
66 #define SEG_ABS 0x40000000L     /* mask for far-absolute segments */
67 
68 #define IDLEN_MAX 4096
69 #define DECOLEN_MAX 32
70 
71 /*
72  * Name pollution problems: <time.h> on Digital UNIX pulls in some
73  * strange hardware header file which sees fit to define R_SP. We
74  * undefine it here so as not to break the enum below.
75  */
76 #ifdef R_SP
77 #undef R_SP
78 #endif
79 
80 /*
81  * We must declare the existence of this structure type up here,
82  * since we have to reference it before we define it...
83  */
84 struct ofmt;
85 
86 /*
87  * Values for the `type' parameter to an output function.
88  */
89 enum out_type {
90     OUT_RAWDATA,    /* Plain bytes */
91     OUT_RESERVE,    /* Reserved bytes (RESB et al) */
92     OUT_ZERODATA,   /* Initialized data, but all zero */
93     OUT_ADDRESS,    /* An address (symbol value) */
94     OUT_RELADDR,    /* A relative address */
95     OUT_SEGMENT,    /* A segment number */
96 
97     /*
98      * These values are used by the legacy backend interface only;
99      * see output/legacy.c for more information.  These should never
100      * be used otherwise.  Once all backends have been migrated to the
101      * new interface they should be removed.
102      */
103     OUT_REL1ADR,
104     OUT_REL2ADR,
105     OUT_REL4ADR,
106     OUT_REL8ADR
107 };
108 
109 enum out_sign {
110     OUT_WRAP,                   /* Undefined signedness (wraps) */
111     OUT_SIGNED,                 /* Value is signed */
112     OUT_UNSIGNED                /* Value is unsigned */
113 };
114 
115 /*
116  * The data we send down to the backend.
117  * XXX: We still want to push down the base address symbol if
118  * available, and replace the segment numbers with a structure.
119  */
120 struct out_data {
121     int64_t offset;             /* Offset within segment */
122     int32_t segment;            /* Segment written to */
123     enum out_type type;         /* See above */
124     enum out_sign sign;         /* See above */
125     int inslen;                 /* Length of instruction */
126     int insoffs;                /* Offset inside instruction */
127     int bits;                   /* Bits mode of compilation */
128     uint64_t size;              /* Size of output */
129     const struct itemplate *itemp; /* Instruction template */
130     const void *data;           /* Data for OUT_RAWDATA */
131     uint64_t toffset;           /* Target address offset for relocation */
132     int32_t tsegment;           /* Target segment for relocation */
133     int32_t twrt;               /* Relocation with respect to */
134     int64_t relbase;            /* Relative base for OUT_RELADDR */
135 };
136 
137 /*
138  * And a label-definition function. The boolean parameter
139  * `is_norm' states whether the label is a `normal' label (which
140  * should affect the local-label system), or something odder like
141  * an EQU or a segment-base symbol, which shouldn't.
142  */
143 typedef void (*ldfunc)(char *label, int32_t segment, int64_t offset,
144                        char *special, bool is_norm);
145 
146 /*
147  * Token types returned by the scanner, in addition to ordinary
148  * ASCII character values, and zero for end-of-string.
149  */
150 enum token_type { /* token types, other than chars */
151     TOKEN_INVALID = -1, /* a placeholder value */
152     TOKEN_EOS = 0,      /* end of string */
153     TOKEN_EQ = '=',
154     TOKEN_GT = '>',
155     TOKEN_LT = '<',     /* aliases */
156     TOKEN_ID = 256,     /* identifier */
157     TOKEN_NUM,          /* numeric constant */
158     TOKEN_ERRNUM,       /* malformed numeric constant */
159     TOKEN_STR,          /* string constant */
160     TOKEN_ERRSTR,       /* unterminated string constant */
161     TOKEN_FLOAT,        /* floating-point constant */
162     TOKEN_REG,          /* register name */
163     TOKEN_INSN,         /* instruction name */
164     TOKEN_HERE,         /* $ */
165     TOKEN_BASE,         /* $$ */
166     TOKEN_SPECIAL,      /* BYTE, WORD, DWORD, QWORD, FAR, NEAR, etc */
167     TOKEN_PREFIX,       /* A32, O16, LOCK, REPNZ, TIMES, etc */
168     TOKEN_SHL,          /* << or <<< */
169     TOKEN_SHR,          /* >> */
170     TOKEN_SAR,          /* >>> */
171     TOKEN_SDIV,         /* // */
172     TOKEN_SMOD,         /* %% */
173     TOKEN_GE,           /* >= */
174     TOKEN_LE,           /* <= */
175     TOKEN_NE,           /* <> (!= is same as <>) */
176     TOKEN_DBL_AND,      /* && */
177     TOKEN_DBL_OR,       /* || */
178     TOKEN_DBL_XOR,      /* ^^ */
179     TOKEN_SEG,          /* SEG */
180     TOKEN_WRT,          /* WRT */
181     TOKEN_FLOATIZE,     /* __floatX__ */
182     TOKEN_STRFUNC,      /* __utf16*__, __utf32*__ */
183     TOKEN_IFUNC,        /* __ilog2*__ */
184     TOKEN_DECORATOR,    /* decorators such as {...} */
185     TOKEN_OPMASK        /* translated token for opmask registers */
186 };
187 
188 enum floatize {
189     FLOAT_8,
190     FLOAT_16,
191     FLOAT_32,
192     FLOAT_64,
193     FLOAT_80M,
194     FLOAT_80E,
195     FLOAT_128L,
196     FLOAT_128H
197 };
198 
199 /* Must match the list in string_transform(), in strfunc.c */
200 enum strfunc {
201     STRFUNC_UTF16,
202     STRFUNC_UTF16LE,
203     STRFUNC_UTF16BE,
204     STRFUNC_UTF32,
205     STRFUNC_UTF32LE,
206     STRFUNC_UTF32BE
207 };
208 
209 enum ifunc {
210     IFUNC_ILOG2E,
211     IFUNC_ILOG2W,
212     IFUNC_ILOG2F,
213     IFUNC_ILOG2C
214 };
215 
216 size_t string_transform(char *, size_t, char **, enum strfunc);
217 
218 /*
219  * The expression evaluator must be passed a scanner function; a
220  * standard scanner is provided as part of nasmlib.c. The
221  * preprocessor will use a different one. Scanners, and the
222  * token-value structures they return, look like this.
223  *
224  * The return value from the scanner is always a copy of the
225  * `t_type' field in the structure.
226  */
227 struct tokenval {
228     char                *t_charptr;
229     int64_t             t_integer;
230     int64_t             t_inttwo;
231     enum token_type     t_type;
232     int8_t              t_flag;
233 };
234 typedef int (*scanner)(void *private_data, struct tokenval *tv);
235 
236 struct location {
237     int64_t offset;
238     int32_t segment;
239     int     known;
240 };
241 extern struct location location;
242 
243 /*
244  * Expression-evaluator datatype. Expressions, within the
245  * evaluator, are stored as an array of these beasts, terminated by
246  * a record with type==0. Mostly, it's a vector type: each type
247  * denotes some kind of a component, and the value denotes the
248  * multiple of that component present in the expression. The
249  * exception is the WRT type, whose `value' field denotes the
250  * segment to which the expression is relative. These segments will
251  * be segment-base types, i.e. either odd segment values or SEG_ABS
252  * types. So it is still valid to assume that anything with a
253  * `value' field of zero is insignificant.
254  */
255 typedef struct {
256     int32_t type;                  /* a register, or EXPR_xxx */
257     int64_t value;                 /* must be >= 32 bits */
258 } expr;
259 
260 /*
261  * Library routines to manipulate expression data types.
262  */
263 bool is_reloc(const expr *vect);
264 bool is_simple(const expr *vect);
265 bool is_really_simple(const expr *vect);
266 bool is_unknown(const expr *vect);
267 bool is_just_unknown(const expr *vect);
268 int64_t reloc_value(const expr *vect);
269 int32_t reloc_seg(const expr *vect);
270 int32_t reloc_wrt(const expr *vect);
271 bool is_self_relative(const expr *vect);
272 void dump_expr(const expr *vect);
273 
274 /*
275  * The evaluator can also return hints about which of two registers
276  * used in an expression should be the base register. See also the
277  * `operand' structure.
278  */
279 struct eval_hints {
280     int64_t base;
281     int     type;
282 };
283 
284 /*
285  * The actual expression evaluator function looks like this. When
286  * called, it expects the first token of its expression to already
287  * be in `*tv'; if it is not, set tv->t_type to TOKEN_INVALID and
288  * it will start by calling the scanner.
289  *
290  * If a forward reference happens during evaluation, the evaluator
291  * must set `*fwref' to true if `fwref' is non-NULL.
292  *
293  * `critical' is non-zero if the expression may not contain forward
294  * references. The evaluator will report its own error if this
295  * occurs; if `critical' is 1, the error will be "symbol not
296  * defined before use", whereas if `critical' is 2, the error will
297  * be "symbol undefined".
298  *
299  * If `critical' has bit 8 set (in addition to its main value: 0x101
300  * and 0x102 correspond to 1 and 2) then an extended expression
301  * syntax is recognised, in which relational operators such as =, <
302  * and >= are accepted, as well as low-precedence logical operators
303  * &&, ^^ and ||.
304  *
305  * If `hints' is non-NULL, it gets filled in with some hints as to
306  * the base register in complex effective addresses.
307  */
308 #define CRITICAL 0x100
309 typedef expr *(*evalfunc)(scanner sc, void *scprivate,
310                           struct tokenval *tv, int *fwref, int critical,
311                           struct eval_hints *hints);
312 
313 /*
314  * Special values for expr->type.
315  * These come after EXPR_REG_END as defined in regs.h.
316  * Expr types : 0 ~ EXPR_REG_END, EXPR_UNKNOWN, EXPR_...., EXPR_RDSAE,
317  *              EXPR_SEGBASE ~ EXPR_SEGBASE + SEG_ABS, ...
318  */
319 #define EXPR_UNKNOWN    (EXPR_REG_END+1) /* forward references */
320 #define EXPR_SIMPLE     (EXPR_REG_END+2)
321 #define EXPR_WRT        (EXPR_REG_END+3)
322 #define EXPR_RDSAE      (EXPR_REG_END+4)
323 #define EXPR_SEGBASE    (EXPR_REG_END+5)
324 
325 /*
326  * preprocessors ought to look like this:
327  */
328 struct preproc_ops {
329     /*
330      * Called once at the very start of assembly.
331      */
332     void (*init)(void);
333 
334     /*
335      * Called at the start of a pass; given a file name, the number
336      * of the pass, an error reporting function, an evaluator
337      * function, and a listing generator to talk to.
338      */
339     void (*reset)(const char *file, int pass, StrList *deplist);
340 
341     /*
342      * Called to fetch a line of preprocessed source. The line
343      * returned has been malloc'ed, and so should be freed after
344      * use.
345      */
346     char *(*getline)(void);
347 
348     /* Called at the end of a pass */
349     void (*cleanup)(int pass);
350 
351     /* Additional macros specific to output format */
352     void (*extra_stdmac)(macros_t *macros);
353 
354     /* Early definitions and undefinitions for macros */
355     void (*pre_define)(char *definition);
356     void (*pre_undefine)(char *definition);
357 
358     /* Include file from command line */
359     void (*pre_include)(char *fname);
360 
361     /* Add a command from the command line */
362     void (*pre_command)(const char *what, char *str);
363 
364     /* Include path from command line */
365     void (*include_path)(const char *path);
366 
367     /* Unwind the macro stack when printing an error message */
368     void (*error_list_macros)(int severity);
369 };
370 
371 extern const struct preproc_ops nasmpp;
372 extern const struct preproc_ops preproc_nop;
373 
374 /* List of dependency files */
375 extern StrList *depend_list;
376 
377 /*
378  * Some lexical properties of the NASM source language, included
379  * here because they are shared between the parser and preprocessor.
380  */
381 
382 /*
383  * isidstart matches any character that may start an identifier, and isidchar
384  * matches any character that may appear at places other than the start of an
385  * identifier. E.g. a period may only appear at the start of an identifier
386  * (for local labels), whereas a number may appear anywhere *but* at the
387  * start.
388  * isbrcchar matches any character that may placed inside curly braces as a
389  * decorator. E.g. {rn-sae}, {1to8}, {k1}{z}
390  */
391 
392 #define isidstart(c) (nasm_isalpha(c)   ||  \
393                       (c) == '_'        ||  \
394                       (c) == '.'        ||  \
395                       (c) == '?'        ||  \
396                       (c) == '@')
397 
398 #define isidchar(c) (isidstart(c)       ||  \
399                      nasm_isdigit(c)    ||  \
400                      (c) == '$'         ||  \
401                      (c) == '#'         ||  \
402                      (c) == '~')
403 
404 #define isbrcchar(c) (isidchar(c)       ||  \
405                       (c) == '-')
406 
407 /* Ditto for numeric constants. */
408 
409 #define isnumstart(c)  (nasm_isdigit(c) || (c) == '$')
410 #define isnumchar(c)   (nasm_isalnum(c) || (c) == '_')
411 
412 /*
413  * inline function to skip past an identifier; returns the first character past
414  * the identifier if valid, otherwise NULL.
415  */
nasm_skip_identifier(const char * str)416 static inline char *nasm_skip_identifier(const char *str)
417 {
418     const char *p = str;
419 
420     if (!isidstart(*p++)) {
421         p = NULL;
422     } else {
423         while (isidchar(*p++))
424             ;
425     }
426     return (char *)p;
427 }
428 
429 /*
430  * Data-type flags that get passed to listing-file routines.
431  */
432 enum {
433     LIST_READ,
434     LIST_MACRO,
435     LIST_MACRO_NOLIST,
436     LIST_INCLUDE,
437     LIST_INCBIN,
438     LIST_TIMES
439 };
440 
441 /*
442  * -----------------------------------------------------------
443  * Format of the `insn' structure returned from `parser.c' and
444  * passed into `assemble.c'
445  * -----------------------------------------------------------
446  */
447 
448 /* Verify value to be a valid register */
is_register(int reg)449 static inline bool is_register(int reg)
450 {
451     return reg >= EXPR_REG_START && reg < REG_ENUM_LIMIT;
452 }
453 
454 enum ccode { /* condition code names */
455     C_A, C_AE, C_B, C_BE, C_C, C_E, C_G, C_GE, C_L, C_LE, C_NA, C_NAE,
456     C_NB, C_NBE, C_NC, C_NE, C_NG, C_NGE, C_NL, C_NLE, C_NO, C_NP,
457     C_NS, C_NZ, C_O, C_P, C_PE, C_PO, C_S, C_Z,
458     C_none = -1
459 };
460 
461 /*
462  * token flags
463  */
464 #define TFLAG_BRC       (1 << 0)    /* valid only with braces. {1to8}, {rd-sae}, ...*/
465 #define TFLAG_BRC_OPT   (1 << 1)    /* may or may not have braces. opmasks {k1} */
466 #define TFLAG_BRC_ANY   (TFLAG_BRC | TFLAG_BRC_OPT)
467 #define TFLAG_BRDCAST   (1 << 2)    /* broadcasting decorator */
468 #define TFLAG_WARN	(1 << 3)    /* warning only, treat as ID */
469 
get_cond_opcode(enum ccode c)470 static inline uint8_t get_cond_opcode(enum ccode c)
471 {
472     static const uint8_t ccode_opcodes[] = {
473         0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xf, 0xd, 0xc, 0xe, 0x6, 0x2,
474         0x3, 0x7, 0x3, 0x5, 0xe, 0xc, 0xd, 0xf, 0x1, 0xb, 0x9, 0x5,
475         0x0, 0xa, 0xa, 0xb, 0x8, 0x4
476     };
477 
478 	return ccode_opcodes[(int)c];
479 }
480 
481 /*
482  * REX flags
483  */
484 #define REX_MASK    0x4f    /* Actual REX prefix bits */
485 #define REX_B       0x01    /* ModRM r/m extension */
486 #define REX_X       0x02    /* SIB index extension */
487 #define REX_R       0x04    /* ModRM reg extension */
488 #define REX_W       0x08    /* 64-bit operand size */
489 #define REX_L       0x20    /* Use LOCK prefix instead of REX.R */
490 #define REX_P       0x40    /* REX prefix present/required */
491 #define REX_H       0x80    /* High register present, REX forbidden */
492 #define REX_V       0x0100  /* Instruction uses VEX/XOP instead of REX */
493 #define REX_NH      0x0200  /* Instruction which doesn't use high regs */
494 #define REX_EV      0x0400  /* Instruction uses EVEX instead of REX */
495 
496 /*
497  * EVEX bit field
498  */
499 #define EVEX_P0MM       0x0f        /* EVEX P[3:0] : Opcode map           */
500 #define EVEX_P0RP       0x10        /* EVEX P[4] : High-16 reg            */
501 #define EVEX_P0X        0x40        /* EVEX P[6] : High-16 rm             */
502 #define EVEX_P1PP       0x03        /* EVEX P[9:8] : Legacy prefix        */
503 #define EVEX_P1VVVV     0x78        /* EVEX P[14:11] : NDS register       */
504 #define EVEX_P1W        0x80        /* EVEX P[15] : Osize extension       */
505 #define EVEX_P2AAA      0x07        /* EVEX P[18:16] : Embedded opmask    */
506 #define EVEX_P2VP       0x08        /* EVEX P[19] : High-16 NDS reg       */
507 #define EVEX_P2B        0x10        /* EVEX P[20] : Broadcast / RC / SAE  */
508 #define EVEX_P2LL       0x60        /* EVEX P[22:21] : Vector length      */
509 #define EVEX_P2RC       EVEX_P2LL   /* EVEX P[22:21] : Rounding control   */
510 #define EVEX_P2Z        0x80        /* EVEX P[23] : Zeroing/Merging       */
511 
512 /*
513  * REX_V "classes" (prefixes which behave like VEX)
514  */
515 enum vex_class {
516     RV_VEX      = 0,    /* C4/C5 */
517     RV_XOP      = 1,    /* 8F */
518     RV_EVEX     = 2     /* 62 */
519 };
520 
521 /*
522  * Note that because segment registers may be used as instruction
523  * prefixes, we must ensure the enumerations for prefixes and
524  * register names do not overlap.
525  */
526 enum prefixes { /* instruction prefixes */
527     P_none = 0,
528     PREFIX_ENUM_START = REG_ENUM_LIMIT,
529     P_A16 = PREFIX_ENUM_START,
530     P_A32,
531     P_A64,
532     P_ASP,
533     P_LOCK,
534     P_O16,
535     P_O32,
536     P_O64,
537     P_OSP,
538     P_REP,
539     P_REPE,
540     P_REPNE,
541     P_REPNZ,
542     P_REPZ,
543     P_TIMES,
544     P_WAIT,
545     P_XACQUIRE,
546     P_XRELEASE,
547     P_BND,
548     P_NOBND,
549     P_EVEX,
550     P_VEX3,
551     P_VEX2,
552     PREFIX_ENUM_LIMIT
553 };
554 
555 enum extop_type { /* extended operand types */
556     EOT_NOTHING,
557     EOT_DB_STRING,      /* Byte string */
558     EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
559     EOT_DB_NUMBER       /* Integer */
560 };
561 
562 enum ea_flags { /* special EA flags */
563     EAF_BYTEOFFS    =  1,   /* force offset part to byte size */
564     EAF_WORDOFFS    =  2,   /* force offset part to [d]word size */
565     EAF_TIMESTWO    =  4,   /* really do EAX*2 not EAX+EAX */
566     EAF_REL         =  8,   /* IP-relative addressing */
567     EAF_ABS         = 16,   /* non-IP-relative addressing */
568     EAF_FSGS        = 32,   /* fs/gs segment override present */
569     EAF_MIB         = 64    /* mib operand */
570 };
571 
572 enum eval_hint { /* values for `hinttype' */
573     EAH_NOHINT   = 0,       /* no hint at all - our discretion */
574     EAH_MAKEBASE = 1,       /* try to make given reg the base */
575     EAH_NOTBASE  = 2,       /* try _not_ to make reg the base */
576     EAH_SUMMED   = 3        /* base and index are summed into index */
577 };
578 
579 typedef struct operand { /* operand to an instruction */
580     opflags_t       type;       /* type of operand */
581     int             disp_size;  /* 0 means default; 16; 32; 64 */
582     enum reg_enum   basereg;
583     enum reg_enum   indexreg;   /* address registers */
584     int             scale;      /* index scale */
585     int             hintbase;
586     enum eval_hint  hinttype;   /* hint as to real base register */
587     int32_t         segment;    /* immediate segment, if needed */
588     int64_t         offset;     /* any immediate number */
589     int32_t         wrt;        /* segment base it's relative to */
590     int             eaflags;    /* special EA flags */
591     int             opflags;    /* see OPFLAG_* defines below */
592     decoflags_t     decoflags;  /* decorator flags such as {...} */
593 } operand;
594 
595 #define OPFLAG_FORWARD      1   /* operand is a forward reference */
596 #define OPFLAG_EXTERN       2   /* operand is an external reference */
597 #define OPFLAG_UNKNOWN      4   /* operand is an unknown reference
598                                    (always a forward reference also) */
599 #define OPFLAG_RELATIVE     8   /* operand is self-relative, e.g. [foo - $]
600                                    where foo is not in the current segment */
601 
602 typedef struct extop { /* extended operand */
603     struct extop    *next;      /* linked list */
604     char            *stringval; /* if it's a string, then here it is */
605     size_t          stringlen;  /* ... and here's how long it is */
606     int64_t         offset;     /* ... it's given here ... */
607     int32_t         segment;    /* if it's a number/address, then... */
608     int32_t         wrt;        /* ... and here */
609     bool            relative;   /* self-relative expression */
610     enum extop_type type;       /* defined above */
611 } extop;
612 
613 enum ea_type {
614     EA_INVALID,     /* Not a valid EA at all */
615     EA_SCALAR,      /* Scalar EA */
616     EA_XMMVSIB,     /* XMM vector EA */
617     EA_YMMVSIB,     /* YMM vector EA */
618     EA_ZMMVSIB      /* ZMM vector EA */
619 };
620 
621 /*
622  * Prefix positions: each type of prefix goes in a specific slot.
623  * This affects the final ordering of the assembled output, which
624  * shouldn't matter to the processor, but if you have stylistic
625  * preferences, you can change this.  REX prefixes are handled
626  * differently for the time being.
627  *
628  * LOCK and REP used to be one slot; this is no longer the case since
629  * the introduction of HLE.
630  */
631 enum prefix_pos {
632     PPS_WAIT,   /* WAIT (technically not a prefix!) */
633     PPS_REP,    /* REP/HLE prefix */
634     PPS_LOCK,   /* LOCK prefix */
635     PPS_SEG,    /* Segment override prefix */
636     PPS_OSIZE,  /* Operand size prefix */
637     PPS_ASIZE,  /* Address size prefix */
638     PPS_VEX,    /* VEX type */
639     MAXPREFIX   /* Total number of prefix slots */
640 };
641 
642 /*
643  * Tuple types that are used when determining Disp8*N eligibility
644  * The order must match with a hash %tuple_codes in insns.pl
645  */
646 enum ttypes {
647     FV    = 001,
648     HV    = 002,
649     FVM   = 003,
650     T1S8  = 004,
651     T1S16 = 005,
652     T1S   = 006,
653     T1F32 = 007,
654     T1F64 = 010,
655     T2    = 011,
656     T4    = 012,
657     T8    = 013,
658     HVM   = 014,
659     QVM   = 015,
660     OVM   = 016,
661     M128  = 017,
662     DUP   = 020
663 };
664 
665 /* EVEX.L'L : Vector length on vector insns */
666 enum vectlens {
667     VL128 = 0,
668     VL256 = 1,
669     VL512 = 2,
670     VLMAX = 3
671 };
672 
673 /* If you need to change this, also change it in insns.pl */
674 #define MAX_OPERANDS 5
675 
676 typedef struct insn { /* an instruction itself */
677     char            *label;                 /* the label defined, or NULL */
678     int             prefixes[MAXPREFIX];    /* instruction prefixes, if any */
679     enum opcode     opcode;                 /* the opcode - not just the string */
680     enum ccode      condition;              /* the condition code, if Jcc/SETcc */
681     int             operands;               /* how many operands? 0-3 (more if db et al) */
682     int             addr_size;              /* address size */
683     operand         oprs[MAX_OPERANDS];     /* the operands, defined as above */
684     extop           *eops;                  /* extended operands */
685     int             eops_float;             /* true if DD and floating */
686     int32_t         times;                  /* repeat count (TIMES prefix) */
687     bool            forw_ref;               /* is there a forward reference? */
688     bool            rex_done;               /* REX prefix emitted? */
689     int             rex;                    /* Special REX Prefix */
690     int             vexreg;                 /* Register encoded in VEX prefix */
691     int             vex_cm;                 /* Class and M field for VEX prefix */
692     int             vex_wlp;                /* W, P and L information for VEX prefix */
693     uint8_t         evex_p[3];              /* EVEX.P0: [RXB,R',00,mm], P1: [W,vvvv,1,pp] */
694                                             /* EVEX.P2: [z,L'L,b,V',aaa] */
695     enum ttypes     evex_tuple;             /* Tuple type for compressed Disp8*N */
696     int             evex_rm;                /* static rounding mode for AVX512 (EVEX) */
697     int8_t          evex_brerop;            /* BR/ER/SAE operand position */
698 } insn;
699 
700 /* Instruction flags type: IF_* flags are defined in insns.h */
701 typedef uint64_t iflags_t;
702 
703 /*
704  * What to return from a directive- or pragma-handling function.
705  * Currently DIRR_OK and DIRR_ERROR are treated the same way;
706  * in both cases the backend is expected to produce the appropriate
707  * error message on its own.
708  *
709  * DIRR_BADPARAM causes a generic error message to be printed.  Note
710  * that it is an error, not a warning, even in the case of pragmas;
711  * don't use it where forward compatiblity would be compromised
712  * (instead consider adding a DIRR_WARNPARAM.)
713  */
714 enum directive_result {
715     DIRR_UNKNOWN,               /* Directive not handled by backend */
716     DIRR_OK,                    /* Directive processed */
717     DIRR_ERROR,                 /* Directive processed unsuccessfully */
718     DIRR_BADPARAM               /* Print bad argument error message */
719 };
720 
721 /*
722  * A pragma facility: this structure is used to request passing a
723  * parsed pragma directive for a specific facility.  If the handler is
724  * NULL then this pragma facility is recognized but ignored; pragma
725  * processing stops at that point.
726  *
727  * Note that the handler is passed a pointer to the facility structure
728  * as part of the struct pragma.
729  */
730 struct pragma;
731 typedef enum directive_result (*pragma_handler)(const struct pragma *);
732 
733 struct pragma_facility {
734     const char *name;
735     pragma_handler handler;
736 };
737 
738 /*
739  * This structure defines how a pragma directive is passed to a
740  * facility.  This structure may be augmented in the future.
741  *
742  * Any facility MAY, but is not required to, add its operations
743  * keywords or a subset thereof into asm/directiv.dat, in which case
744  * the "opcode" field will be set to the corresponding D_ constant
745  * from directiv.h; otherwise it will be D_unknown.
746  */
747 struct pragma {
748     const struct pragma_facility *facility;
749     const char *facility_name;  /* Facility name exactly as entered by user */
750     const char *opname;         /* First word after the facility name */
751     const char *tail;           /* Anything after the operation */
752     enum directive opcode;     /* Operation as a D_ directives constant */
753 };
754 
755 /*
756  * These are semi-arbitrary limits to keep the assembler from going
757  * into a black hole on certain kinds of bugs.  They can be overridden
758  * by command-line options or %pragma.
759  */
760 enum nasm_limit {
761     LIMIT_PASSES,
762     LIMIT_STALLED,
763     LIMIT_MACROS,
764     LIMIT_REP,
765     LIMIT_EVAL,
766     LIMIT_LINES
767 };
768 #define LIMIT_MAX LIMIT_LINES
769 extern int64_t nasm_limit[LIMIT_MAX+1];
770 extern enum directive_result  nasm_set_limit(const char *, const char *);
771 
772 /*
773  * The data structure defining an output format driver, and the
774  * interfaces to the functions therein.
775  */
776 struct ofmt {
777     /*
778      * This is a short (one-liner) description of the type of
779      * output generated by the driver.
780      */
781     const char *fullname;
782 
783     /*
784      * This is a single keyword used to select the driver.
785      */
786     const char *shortname;
787 
788     /*
789      * Default output filename extension, or a null string
790      */
791     const char *extension;
792 
793     /*
794      * Output format flags.
795      */
796 #define OFMT_TEXT		1	/* Text file format */
797 #define OFMT_KEEP_ADDR	2	/* Keep addr; no conversion to data */
798 
799     unsigned int flags;
800 
801     int maxbits;                /* Maximum segment bits supported */
802 
803     /*
804      * this is a pointer to the first element of the debug information
805      */
806     const struct dfmt * const *debug_formats;
807 
808     /*
809      * the default debugging format if -F is not specified
810      */
811     const struct dfmt *default_dfmt;
812 
813     /*
814      * This, if non-NULL, is a NULL-terminated list of `char *'s
815      * pointing to extra standard macros supplied by the object
816      * format (e.g. a sensible initial default value of __SECT__,
817      * and user-level equivalents for any format-specific
818      * directives).
819      */
820     macros_t *stdmac;
821 
822     /*
823      * This procedure is called at the start of an output session to set
824      * up internal parameters.
825      */
826     void (*init)(void);
827 
828     /*
829      * This procedure is called at the start of each pass.
830      */
831     void (*reset)(void);
832 
833     /*
834      * This is the modern output function, which gets passed
835      * a struct out_data with much more information.  See the
836      * definition of struct out_data.
837      */
838     void (*output)(const struct out_data *data);
839 
840     /*
841      * This procedure is called by assemble() to write actual
842      * generated code or data to the object file. Typically it
843      * doesn't have to actually _write_ it, just store it for
844      * later.
845      *
846      * The `type' argument specifies the type of output data, and
847      * usually the size as well: its contents are described below.
848      *
849      * This is used for backends which have not yet been ported to
850      * the new interface, and should be NULL on ported backends.
851      * To use this entry point, set the output pointer to
852      * nasm_do_legacy_output.
853      */
854     void (*legacy_output)(int32_t segto, const void *data,
855                           enum out_type type, uint64_t size,
856                           int32_t segment, int32_t wrt);
857 
858     /*
859      * This procedure is called once for every symbol defined in
860      * the module being assembled. It gives the name and value of
861      * the symbol, in NASM's terms, and indicates whether it has
862      * been declared to be global. Note that the parameter "name",
863      * when passed, will point to a piece of static storage
864      * allocated inside the label manager - it's safe to keep using
865      * that pointer, because the label manager doesn't clean up
866      * until after the output driver has.
867      *
868      * Values of `is_global' are: 0 means the symbol is local; 1
869      * means the symbol is global; 2 means the symbol is common (in
870      * which case `offset' holds the _size_ of the variable).
871      * Anything else is available for the output driver to use
872      * internally.
873      *
874      * This routine explicitly _is_ allowed to call the label
875      * manager to define further symbols, if it wants to, even
876      * though it's been called _from_ the label manager. That much
877      * re-entrancy is guaranteed in the label manager. However, the
878      * label manager will in turn call this routine, so it should
879      * be prepared to be re-entrant itself.
880      *
881      * The `special' parameter contains special information passed
882      * through from the command that defined the label: it may have
883      * been an EXTERN, a COMMON or a GLOBAL. The distinction should
884      * be obvious to the output format from the other parameters.
885      */
886     void (*symdef)(char *name, int32_t segment, int64_t offset,
887                    int is_global, char *special);
888 
889     /*
890      * This procedure is called when the source code requests a
891      * segment change. It should return the corresponding segment
892      * _number_ for the name, or NO_SEG if the name is not a valid
893      * segment name.
894      *
895      * It may also be called with NULL, in which case it is to
896      * return the _default_ section number for starting assembly in.
897      *
898      * It is allowed to modify the string it is given a pointer to.
899      *
900      * It is also allowed to specify a default instruction size for
901      * the segment, by setting `*bits' to 16 or 32. Or, if it
902      * doesn't wish to define a default, it can leave `bits' alone.
903      */
904     int32_t (*section)(char *name, int pass, int *bits);
905 
906     /*
907      * This function is called when a label is defined
908      * in the source code. It is allowed to change the section
909      * number as a result, but not the bits value.
910      * This is *only* called if the symbol defined is at the
911      * current offset, i.e. "foo:" or "foo equ $".
912      * The offset isn't passed; and may not be stable at this point.
913      * The subsection number is a field available for use by the
914      * backend. It is initialized to NO_SEG.
915      *
916      * If "copyoffset" is set by the backend then the offset is
917      * copied from the previous segment, otherwise the new segment
918      * is treated as a new segment the normal way.
919      */
920     int32_t (*herelabel)(const char *name, enum label_type type,
921                          int32_t seg, int32_t *subsection,
922                          bool *copyoffset);
923 
924     /*
925      * This procedure is called to modify section alignment,
926      * note there is a trick, the alignment can only increase
927      */
928     void (*sectalign)(int32_t seg, unsigned int value);
929 
930     /*
931      * This procedure is called to modify the segment base values
932      * returned from the SEG operator. It is given a segment base
933      * value (i.e. a segment value with the low bit set), and is
934      * required to produce in return a segment value which may be
935      * different. It can map segment bases to absolute numbers by
936      * means of returning SEG_ABS types.
937      *
938      * It should return NO_SEG if the segment base cannot be
939      * determined; the evaluator (which calls this routine) is
940      * responsible for throwing an error condition if that occurs
941      * in pass two or in a critical expression.
942      */
943     int32_t (*segbase)(int32_t segment);
944 
945     /*
946      * This procedure is called to allow the output driver to
947      * process its own specific directives. When called, it has the
948      * directive word in `directive' and the parameter string in
949      * `value'. It is called in both assembly passes, and `pass'
950      * will be either 1 or 2.
951      *
952      * The following values are (currently) possible for
953      * directive_result:
954      *
955      * 0 - DIRR_UNKNOWN		- directive not recognized by backend
956      * 1 - DIRR_OK		- directive processed ok
957      * 2 - DIRR_ERROR		- backend printed its own error message
958      * 3 - DIRR_BADPARAM	- print the generic message
959      *				  "invalid parameter to [*] directive"
960      */
961     enum directive_result
962     (*directive)(enum directive directive, char *value, int pass);
963 
964     /*
965      * This procedure is called after assembly finishes, to allow
966      * the output driver to clean itself up and free its memory.
967      * Typically, it will also be the point at which the object
968      * file actually gets _written_.
969      *
970      * One thing the cleanup routine should always do is to close
971      * the output file pointer.
972      */
973     void (*cleanup)(void);
974 
975     /*
976      * List of pragma facility names that apply to this backend.
977      */
978     const struct pragma_facility *pragmas;
979 };
980 
981 /*
982  * Output format driver alias
983  */
984 struct ofmt_alias {
985     const char  *shortname;
986     const char  *fullname;
987     const struct ofmt *ofmt;
988 };
989 
990 extern const struct ofmt *ofmt;
991 extern FILE *ofile;
992 
993 /*
994  * ------------------------------------------------------------
995  * The data structure defining a debug format driver, and the
996  * interfaces to the functions therein.
997  * ------------------------------------------------------------
998  */
999 
1000 struct dfmt {
1001     /*
1002      * This is a short (one-liner) description of the type of
1003      * output generated by the driver.
1004      */
1005     const char *fullname;
1006 
1007     /*
1008      * This is a single keyword used to select the driver.
1009      */
1010     const char *shortname;
1011 
1012     /*
1013      * init - called initially to set up local pointer to object format.
1014      */
1015     void (*init)(void);
1016 
1017     /*
1018      * linenum - called any time there is output with a change of
1019      * line number or file.
1020      */
1021     void (*linenum)(const char *filename, int32_t linenumber, int32_t segto);
1022 
1023     /*
1024      * debug_deflabel - called whenever a label is defined. Parameters
1025      * are the same as to 'symdef()' in the output format. This function
1026      * is called after the output format version.
1027      */
1028 
1029     void (*debug_deflabel)(char *name, int32_t segment, int64_t offset,
1030                            int is_global, char *special);
1031     /*
1032      * debug_directive - called whenever a DEBUG directive other than 'LINE'
1033      * is encountered. 'directive' contains the first parameter to the
1034      * DEBUG directive, and params contains the rest. For example,
1035      * 'DEBUG VAR _somevar:int' would translate to a call to this
1036      * function with 'directive' equal to "VAR" and 'params' equal to
1037      * "_somevar:int".
1038      */
1039     void (*debug_directive)(const char *directive, const char *params);
1040 
1041     /*
1042      * typevalue - called whenever the assembler wishes to register a type
1043      * for the last defined label.  This routine MUST detect if a type was
1044      * already registered and not re-register it.
1045      */
1046     void (*debug_typevalue)(int32_t type);
1047 
1048     /*
1049      * debug_output - called whenever output is required
1050      * 'type' is the type of info required, and this is format-specific
1051      */
1052     void (*debug_output)(int type, void *param);
1053 
1054     /*
1055      * cleanup - called after processing of file is complete
1056      */
1057     void (*cleanup)(void);
1058 
1059     /*
1060      * List of pragma facility names that apply to this backend.
1061      */
1062     const struct pragma_facility *pragmas;
1063 };
1064 
1065 extern const struct dfmt *dfmt;
1066 
1067 /*
1068  * The type definition macros
1069  * for debugging
1070  *
1071  * low 3 bits: reserved
1072  * next 5 bits: type
1073  * next 24 bits: number of elements for arrays (0 for labels)
1074  */
1075 
1076 #define TY_UNKNOWN 0x00
1077 #define TY_LABEL   0x08
1078 #define TY_BYTE    0x10
1079 #define TY_WORD    0x18
1080 #define TY_DWORD   0x20
1081 #define TY_FLOAT   0x28
1082 #define TY_QWORD   0x30
1083 #define TY_TBYTE   0x38
1084 #define TY_OWORD   0x40
1085 #define TY_YWORD   0x48
1086 #define TY_ZWORD   0x50
1087 #define TY_COMMON  0xE0
1088 #define TY_SEG     0xE8
1089 #define TY_EXTERN  0xF0
1090 #define TY_EQU     0xF8
1091 
1092 #define TYM_TYPE(x)     ((x) & 0xF8)
1093 #define TYM_ELEMENTS(x) (((x) & 0xFFFFFF00) >> 8)
1094 
1095 #define TYS_ELEMENTS(x) ((x) << 8)
1096 
1097 enum special_tokens {
1098     SPECIAL_ENUM_START  = PREFIX_ENUM_LIMIT,
1099     S_ABS               = SPECIAL_ENUM_START,
1100     S_BYTE,
1101     S_DWORD,
1102     S_FAR,
1103     S_LONG,
1104     S_NEAR,
1105     S_NOSPLIT,
1106     S_OWORD,
1107     S_QWORD,
1108     S_REL,
1109     S_SHORT,
1110     S_STRICT,
1111     S_TO,
1112     S_TWORD,
1113     S_WORD,
1114     S_YWORD,
1115     S_ZWORD,
1116     SPECIAL_ENUM_LIMIT
1117 };
1118 
1119 enum decorator_tokens {
1120     DECORATOR_ENUM_START    = SPECIAL_ENUM_LIMIT,
1121     BRC_1TO2                = DECORATOR_ENUM_START,
1122     BRC_1TO4,
1123     BRC_1TO8,
1124     BRC_1TO16,
1125     BRC_RN,
1126     BRC_RD,
1127     BRC_RU,
1128     BRC_RZ,
1129     BRC_SAE,
1130     BRC_Z,
1131     DECORATOR_ENUM_LIMIT
1132 };
1133 
1134 /*
1135  * AVX512 Decorator (decoflags_t) bits distribution (counted from 0)
1136  *  3         2         1
1137  * 10987654321098765432109876543210
1138  *                |
1139  *                | word boundary
1140  * ............................1111 opmask
1141  * ...........................1.... zeroing / merging
1142  * ..........................1..... broadcast
1143  * .........................1...... static rounding
1144  * ........................1....... SAE
1145  * ......................11........ broadcast element size
1146  * ....................11.......... number of broadcast elements
1147  */
1148 #define OP_GENVAL(val, bits, shift)     (((val) & ((UINT64_C(1) << (bits)) - 1)) << (shift))
1149 
1150 /*
1151  * Opmask register number
1152  * identical to EVEX.aaa
1153  *
1154  * Bits: 0 - 3
1155  */
1156 #define OPMASK_SHIFT            (0)
1157 #define OPMASK_BITS             (4)
1158 #define OPMASK_MASK             OP_GENMASK(OPMASK_BITS, OPMASK_SHIFT)
1159 #define GEN_OPMASK(bit)         OP_GENBIT(bit, OPMASK_SHIFT)
1160 #define VAL_OPMASK(val)         OP_GENVAL(val, OPMASK_BITS, OPMASK_SHIFT)
1161 
1162 /*
1163  * zeroing / merging control available
1164  * matching to EVEX.z
1165  *
1166  * Bits: 4
1167  */
1168 #define Z_SHIFT                 (4)
1169 #define Z_BITS                  (1)
1170 #define Z_MASK                  OP_GENMASK(Z_BITS, Z_SHIFT)
1171 #define GEN_Z(bit)              OP_GENBIT(bit, Z_SHIFT)
1172 
1173 /*
1174  * broadcast - Whether this operand can be broadcasted
1175  *
1176  * Bits: 5
1177  */
1178 #define BRDCAST_SHIFT           (5)
1179 #define BRDCAST_BITS            (1)
1180 #define BRDCAST_MASK            OP_GENMASK(BRDCAST_BITS, BRDCAST_SHIFT)
1181 #define GEN_BRDCAST(bit)        OP_GENBIT(bit, BRDCAST_SHIFT)
1182 
1183 /*
1184  * Whether this instruction can have a static rounding mode.
1185  * It goes with the last simd operand because the static rounding mode
1186  * decorator is located between the last simd operand and imm8 (if any).
1187  *
1188  * Bits: 6
1189  */
1190 #define STATICRND_SHIFT         (6)
1191 #define STATICRND_BITS          (1)
1192 #define STATICRND_MASK          OP_GENMASK(STATICRND_BITS, STATICRND_SHIFT)
1193 #define GEN_STATICRND(bit)      OP_GENBIT(bit, STATICRND_SHIFT)
1194 
1195 /*
1196  * SAE(Suppress all exception) available
1197  *
1198  * Bits: 7
1199  */
1200 #define SAE_SHIFT               (7)
1201 #define SAE_BITS                (1)
1202 #define SAE_MASK                OP_GENMASK(SAE_BITS, SAE_SHIFT)
1203 #define GEN_SAE(bit)            OP_GENBIT(bit, SAE_SHIFT)
1204 
1205 /*
1206  * Broadcasting element size.
1207  *
1208  * Bits: 8 - 9
1209  */
1210 #define BRSIZE_SHIFT            (8)
1211 #define BRSIZE_BITS             (2)
1212 #define BRSIZE_MASK             OP_GENMASK(BRSIZE_BITS, BRSIZE_SHIFT)
1213 #define GEN_BRSIZE(bit)         OP_GENBIT(bit, BRSIZE_SHIFT)
1214 
1215 #define BR_BITS32               GEN_BRSIZE(0)
1216 #define BR_BITS64               GEN_BRSIZE(1)
1217 
1218 /*
1219  * Number of broadcasting elements
1220  *
1221  * Bits: 10 - 11
1222  */
1223 #define BRNUM_SHIFT             (10)
1224 #define BRNUM_BITS              (2)
1225 #define BRNUM_MASK              OP_GENMASK(BRNUM_BITS, BRNUM_SHIFT)
1226 #define VAL_BRNUM(val)          OP_GENVAL(val, BRNUM_BITS, BRNUM_SHIFT)
1227 
1228 #define BR_1TO2                 VAL_BRNUM(0)
1229 #define BR_1TO4                 VAL_BRNUM(1)
1230 #define BR_1TO8                 VAL_BRNUM(2)
1231 #define BR_1TO16                VAL_BRNUM(3)
1232 
1233 #define MASK                    OPMASK_MASK             /* Opmask (k1 ~ 7) can be used */
1234 #define Z                       Z_MASK
1235 #define B32                     (BRDCAST_MASK|BR_BITS32) /* {1to16} : broadcast 32b * 16 to zmm(512b) */
1236 #define B64                     (BRDCAST_MASK|BR_BITS64) /* {1to8}  : broadcast 64b *  8 to zmm(512b) */
1237 #define ER                      STATICRND_MASK          /* ER(Embedded Rounding) == Static rounding mode */
1238 #define SAE                     SAE_MASK                /* SAE(Suppress All Exception) */
1239 
1240 /*
1241  * Global modes
1242  */
1243 
1244 /*
1245  * This declaration passes the "pass" number to all other modules
1246  * "pass0" assumes the values: 0, 0, ..., 0, 1, 2
1247  * where 0 = optimizing pass
1248  *       1 = pass 1
1249  *       2 = pass 2
1250  */
1251 
1252 /*
1253  * flag to disable optimizations selectively
1254  * this is useful to turn-off certain optimizations
1255  */
1256 enum optimization_disable_flag {
1257     OPTIM_ALL_ENABLED       = 0,
1258     OPTIM_DISABLE_JMP_MATCH = 1
1259 };
1260 
1261 struct optimization {
1262     int level;
1263     int flag;
1264 };
1265 
1266 extern int pass0;
1267 extern int64_t passn;           /* Actual pass number */
1268 
1269 extern bool tasm_compatible_mode;
1270 extern struct optimization optimizing;
1271 extern int globalbits;          /* 16, 32 or 64-bit mode */
1272 extern int globalrel;           /* default to relative addressing? */
1273 extern int globalbnd;           /* default to using bnd prefix? */
1274 
1275 extern const char *inname;	/* primary input filename */
1276 extern const char *outname;     /* output filename */
1277 
1278 /*
1279  * Switch to a different segment and return the current offset
1280  */
1281 int64_t switch_segment(int32_t segment);
1282 
1283 #endif
1284