1 /****************************************************************************
2 *
3 *                            Open Watcom Project
4 *
5 *    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
6 *
7 *  ========================================================================
8 *
9 *    This file contains Original Code and/or Modifications of Original
10 *    Code as defined in and that are subject to the Sybase Open Watcom
11 *    Public License version 1.0 (the 'License'). You may not use this file
12 *    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
13 *    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
14 *    provided with the Original Code and Modifications, and is also
15 *    available at www.sybase.com/developer/opensource.
16 *
17 *    The Original Code and all software distributed under the License are
18 *    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 *    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
20 *    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
21 *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
22 *    NON-INFRINGEMENT. Please see the License for the specific language
23 *    governing rights and limitations under the License.
24 *
25 *  ========================================================================
26 *
27 * Description:  Parser items
28 *
29 ****************************************************************************/
30 
31 #ifndef PARSER_H
32 #define PARSER_H
33 
34 #include "operands.h"
35 #include "symbols.h"
36 #include "token.h"
37 
38 /* define tokens for SpecialTable (registers, operators, ... ) */
39 enum special_token {
40     T_NULL,
41 #define  res(token, string, type, value, bytval, flags, cpu, sflags) T_ ## token ,
42 #include "special.h"
43 #undef res
44 /* define tokens for SpecialTable (directives) */
45 #define  res(token, string, value, bytval, flags, cpu, sflags) T_ ## token ,
46 #include "directve.h"
47 #undef res
48 SPECIAL_LAST
49 };
50 
51 /* define tokens for instruction table (InstrTable[] in reswords.c) */
52 
53 enum instr_token {
54     INS_FIRST_1 = SPECIAL_LAST - 1, /* to ensure tokens are unique */
55 #define  ins(token, string, opcls, byte1_info,op_dir,rm_info,opcode,rm_byte,cpu,prefix ) T_ ## token ,
56 #define insx(token, string, opcls, byte1_info,op_dir,rm_info,opcode,rm_byte,cpu,prefix,flgs ) T_ ## token ,
57 #define insn(tok, suffix,   opcls, byte1_info,op_dir,rm_info,opcode,rm_byte,cpu,prefix)
58 #define insm(tok, suffix,   opcls, byte1_info,op_dir,rm_info,opcode,rm_byte,cpu,prefix)
59 #include "instruct.h"
60 #undef insm
61 #undef insn
62 #undef insx
63 #undef ins
64 #if AVXSUPP
65 #define VEX_START  T_VBROADCASTSS  /* first VEX encoded item */
66 #define avxins(token, string, cpu, flgs ) T_V ## token ,
67 #include "instravx.h"
68 #undef avxins
69 #endif
70 };
71 
72 /*---------------------------------------------------------------------------*/
73 
74 /* queue of symbols */
75 struct symbol_queue {
76     struct dsym *head;
77     struct dsym *tail;
78 };
79 
80 enum queue_type {
81     TAB_UNDEF = 0,
82     TAB_EXT,      /* externals (EXTERNDEF, EXTERN, COMM, PROTO ) */
83     TAB_SEG,      /* SEGMENT items */
84     TAB_GRP,      /* GROUP items */
85     TAB_PROC,     /* PROC items */
86     TAB_ALIAS,    /* ALIAS items */
87     TAB_LAST,
88 };
89 
90 /* several lists, see enum queue_type above */
91 extern struct symbol_queue SymTables[];
92 
93 /*
94  values for <rm_info> (3 bits)
95  000            -> has rm_byte with w-, d- and/or s-bit in opcode
96  001( no_RM   ) -> no rm_byte - may have w-bit
97  010( no_WDS  ) -> has rm_byte, but w-bit, d-bit, s-bit of opcode are absent
98  011( R_in_OP ) -> no rm_byte, reg field (if any) is included in opcode
99  */
100 enum rm_info {
101     no_RM   = 0x1,
102     no_WDS  = 0x2,
103     R_in_OP = 0x3,
104 };
105 
106 /* values for <allowed_prefix> (3 bits) */
107 enum allowed_prefix {
108     // AP_NO_PREFIX= 0x00, /* value 0 means "normal" */
109     AP_LOCK     = 0x01,
110     AP_REP      = 0x02,
111     AP_REPxx    = 0x03,
112     AP_FWAIT    = 0x04,
113     AP_NO_FWAIT = 0x05
114 };
115 
116 /* values for field type in special_item.
117  * it should match order of T_REGISTER - T_RES_ID in token.h
118  */
119 
120 enum special_type {
121     RWT_REG = 2,  /* same value as for T_REG */
122     RWT_DIRECTIVE,
123     RWT_UNARY_OP,
124     RWT_BINARY_OP,
125     RWT_STYPE,
126     RWT_RES_ID
127 };
128 
129 // values for sflags if register
130 enum op1_flags {
131     SFR_SIZMSK  = 0x1F, /* size in bits 0-4 */
132     SFR_IREG    = 0x20,
133     SFR_SSBASED = 0x40, /* v2.11: added */
134 };
135 
136 #if AMD64_SUPPORT
137 enum rex_bits {
138     REX_B = 1,  /* regno 0-7 <-> 8-15 of ModR/M or SIB base */
139     REX_X = 2,  /* regno 0-7 <-> 8-15 of SIB index */
140     REX_R = 4,  /* regno 0-7 <-> 8-15 of ModR/M REG */
141     REX_W = 8   /* wide 32 <-> 64 */
142 };
143 #endif
144 
145 /* operand classes. this table is defined in reswords.c.
146  * index into this array is member opclsidx in instr_item.
147  * v2.06: data removed from struct instr_item.
148  */
149 struct opnd_class {
150     enum operand_type opnd_type[2];  /* operands 1 + 2 */
151     unsigned char opnd_type_3rd;     /* operand 3 */
152 };
153 
154 /* instr_item is the structure used to store instructions
155  * in InstrTable (instruct.h).
156  * Most compilers will use unsigned type for enums, just OW
157  * allows to use the smallest size possible.
158  */
159 
160 struct instr_item {
161     //enum operand_type opnd_type[2];  /* operands 1 + 2 */
162     //unsigned char opnd_type_3rd;     /* operand 3 */
163     unsigned char opclsidx;     /* v2.06: index for opnd_clstab */
164     unsigned char byte1_info;   /* flags for 1st byte */
165     unsigned char
166         allowed_prefix  : 3,    /* allowed prefix */
167         first           : 1,    /* 1=opcode's first entry */
168         rm_info         : 3,    /* info on r/m byte */
169         opnd_dir        : 1;    /* operand direction */
170     unsigned char reserved;     /* not used yet */
171 #ifdef __WATCOMC__
172     enum cpu_info   cpu;        /* CPU type */
173 #else
174     unsigned short  cpu;
175 #endif
176     unsigned char   opcode;     /* opcode byte */
177     unsigned char   rm_byte;    /* mod_rm_byte */
178 };
179 
180 /* special_item is the structure used to store directives and
181  * other reserved words in SpecialTable (special.h).
182  */
183 struct special_item {
184     unsigned     value;
185     unsigned     sflags;
186 #ifdef __WATCOMC__
187     enum cpu_info   cpu;     /* CPU type */
188 #else
189     uint_16         cpu;     /* CPU type */
190 #endif
191     uint_8          bytval;
192 #ifdef __WATCOMC__
193     enum special_type type;
194 #else
195     uint_8            type;
196 #endif
197 };
198 
199 #define GetRegNo( x )    SpecialTable[x].bytval
200 #define GetSflagsSp( x ) SpecialTable[x].sflags
201 #define GetValueSp( x )  SpecialTable[x].value
202 #define GetMemtypeSp( x ) SpecialTable[x].bytval
203 #define GetCpuSp( x )    SpecialTable[x].cpu
204 
205 /* values for <value> if type == RWT_DIRECTIVE */
206 enum directive_flags {
207     DF_CEXPR    = 0x01, /* avoid '<' being used as string delimiter (.IF, ...) */
208     DF_STRPARM  = 0x02, /* directive expects string param(s) (IFB, IFDIF, ...) */
209                         /* enclose strings in <> in macro expansion step */
210     DF_NOEXPAND = 0x04, /* don't expand params for directive (PURGE, FOR, IFDEF, ...) */
211     DF_LABEL    = 0x08, /* directive requires a label */
212     DF_NOSTRUC  = 0x10, /* directive not allowed inside structs/unions */
213     DF_NOCONCAT = 0x20, /* don't concat line */
214     DF_PROC     = 0x40, /* directive triggers prologue generation */
215     DF_STORE    = 0x80, /* FASTPASS: directive triggers line store */
216     DF_CGEN     = 0x100 /* directive generates lines */
217 };
218 
219 /* values for <bytval> if type == RWT_DIRECTIVE */
220 #define  res(token, function) DRT_ ## token ,
221 enum directive_type {
222 #include "dirtype.h"
223 };
224 #undef  res
225 
226 #define MAX_OPND 3
227 
228 struct opnd_item {
229     enum operand_type type;
230     union {
231         struct {
232             int_32    data32l;
233             int_32    data32h; /* needed for OP_I48 and OP_I64 */
234         };
235         uint_64       data64;
236     };
237     struct fixup      *InsFixup;
238 };
239 
240 /* code_info describes the current instruction. It's the communication
241  * structure between parser and code generator.
242  */
243 struct code_info {
244     struct {
245         enum instr_token ins;          /* prefix before instruction, e.g. lock, rep, repnz */
246         enum assume_segreg RegOverride;/* segment override (0=ES,1=CS,2=SS,3=DS,...) */
247 #if AMD64_SUPPORT
248         unsigned char   rex;
249 #endif
250         unsigned char   adrsiz:1;      /* address size prefix 0x67 is to be emitted */
251         unsigned char   opsiz:1;       /* operand size prefix 0x66 is to be emitted */
252     } prefix;
253     const struct instr_item *pinstr;   /* current pointer into InstrTable */
254     enum instr_token token;
255     enum memtype    mem_type;          /* byte / word / etc. NOT near/far */
256     struct opnd_item opnd[MAX_OPND];
257     unsigned char   rm_byte;
258     unsigned char   sib;
259     unsigned char   Ofssize;
260     unsigned char   opc_or;
261 #if AVXSUPP
262     unsigned char   vexregop; /* in based-1 format (0=empty) */
263 #endif
264     union {
265         unsigned char flags;
266         struct {
267             unsigned char   iswide:1;       /* 0=byte, 1=word/dword/qword */
268             unsigned char   isdirect:1;     /* 1=direct addressing mode */
269             unsigned char   isfar:1;        /* CALL/JMP far */
270             unsigned char   const_size_fixed:1; /* v2.01 */
271 #if AMD64_SUPPORT
272             unsigned char   x86hi_used:1;   /* AH,BH,CH,DH used */
273             unsigned char   x64lo_used:1;   /* SPL,BPL,SIL,DIL used */
274 #endif
275             unsigned char   undef_sym:1;    /* v2.06b: struct member is forward ref */
276         };
277     };
278 };
279 
280 #define OPND1 0
281 #define OPND2 1
282 #define OPND3 2
283 
284 /* branch instructions are still sorted:
285  * CALL, JMP, Jcc, J[e|r]CXZ, LOOP, LOOPcc
286  */
287 
288 #define IS_CALL( inst )       ( inst == T_CALL )
289 #define IS_JMPCALL( inst )    ( inst == T_CALL || inst == T_JMP    )
290 #define IS_JMP( inst )        ( inst >= T_JMP  && inst < T_LOOP  )
291 #define IS_JCC( inst )        ( inst >  T_JMP  && inst < T_JCXZ  )
292 #define IS_BRANCH( inst )     ( inst >= T_CALL && inst < T_LOOP  )
293 #define IS_ANY_BRANCH( inst ) ( inst >= T_CALL && inst <= T_LOOPNZW )
294 #define IS_XCX_BRANCH( inst ) ( inst >= T_JCXZ && inst <= T_LOOPNZW )
295 
296 #define IS_OPER_32( s )   ( s->Ofssize ? ( s->prefix.opsiz == FALSE ) : ( s->prefix.opsiz == TRUE ))
297 
298 /* globals */
299 //extern struct asym           WordSize;
300 //#define CurrWordSize WordSize.value
301 extern const struct instr_item   InstrTable[];   /* instruction table */
302 extern const struct special_item SpecialTable[]; /* rest of res words */
303 extern uint_16                   optable_idx[];  /* helper, access thru IndexFromToken() only */
304 
305 #define IndexFromToken( tok )  optable_idx[ ( tok ) - SPECIAL_LAST ]
306 
307 extern int        SizeFromMemtype( enum memtype, int, struct asym * );
308 extern ret_code   MemtypeFromSize( int, enum memtype * );
309 extern int        SizeFromRegister( int );
310 extern ret_code   GetLangType( int *, struct asm_tok[], enum lang_type * );
311 
312 extern void       sym_add_table( struct symbol_queue *, struct dsym * );
313 extern void       sym_remove_table( struct symbol_queue *, struct dsym * );
314 extern void       sym_ext2int( struct asym * );
315 
316 extern int        OperandSize( enum operand_type, const struct code_info * );
317 extern void       set_frame( const struct asym *sym );
318 extern void       set_frame2( const struct asym *sym );
319 extern ret_code   ParseLine( struct asm_tok[] );
320 extern void       ProcessFile( struct asm_tok[] );
321 
322 extern void       WritePreprocessedLine( const char * );
323 
324 #endif
325