1 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16 
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/ADT/StringRef.h"
19 #include <string>
20 
21 namespace llvm {
22 
23 class Twine;
24 
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27   enum TokenKind {
28     // Markers
29     Eof,
30     Error,
31     Newline,
32 
33     // Tokens with no info.
34     comma,
35     equal,
36     underscore,
37     colon,
38     coloncolon,
39     dot,
40     exclaim,
41     lparen,
42     rparen,
43     lbrace,
44     rbrace,
45     plus,
46     minus,
47     less,
48     greater,
49 
50     // Keywords
51     kw_implicit,
52     kw_implicit_define,
53     kw_def,
54     kw_dead,
55     kw_dereferenceable,
56     kw_killed,
57     kw_undef,
58     kw_internal,
59     kw_early_clobber,
60     kw_debug_use,
61     kw_renamable,
62     kw_tied_def,
63     kw_frame_setup,
64     kw_frame_destroy,
65     kw_nnan,
66     kw_ninf,
67     kw_nsz,
68     kw_arcp,
69     kw_contract,
70     kw_afn,
71     kw_reassoc,
72     kw_nuw,
73     kw_nsw,
74     kw_exact,
75     kw_nofpexcept,
76     kw_unpredictable,
77     kw_debug_location,
78     kw_debug_instr_number,
79     kw_dbg_instr_ref,
80     kw_cfi_same_value,
81     kw_cfi_offset,
82     kw_cfi_rel_offset,
83     kw_cfi_def_cfa_register,
84     kw_cfi_def_cfa_offset,
85     kw_cfi_adjust_cfa_offset,
86     kw_cfi_escape,
87     kw_cfi_def_cfa,
88     kw_cfi_llvm_def_aspace_cfa,
89     kw_cfi_register,
90     kw_cfi_remember_state,
91     kw_cfi_restore,
92     kw_cfi_restore_state,
93     kw_cfi_undefined,
94     kw_cfi_window_save,
95     kw_cfi_aarch64_negate_ra_sign_state,
96     kw_blockaddress,
97     kw_intrinsic,
98     kw_target_index,
99     kw_half,
100     kw_float,
101     kw_double,
102     kw_x86_fp80,
103     kw_fp128,
104     kw_ppc_fp128,
105     kw_target_flags,
106     kw_volatile,
107     kw_non_temporal,
108     kw_invariant,
109     kw_align,
110     kw_basealign,
111     kw_addrspace,
112     kw_stack,
113     kw_got,
114     kw_jump_table,
115     kw_constant_pool,
116     kw_call_entry,
117     kw_custom,
118     kw_liveout,
119     kw_landing_pad,
120     kw_inlineasm_br_indirect_target,
121     kw_ehfunclet_entry,
122     kw_liveins,
123     kw_successors,
124     kw_floatpred,
125     kw_intpred,
126     kw_shufflemask,
127     kw_pre_instr_symbol,
128     kw_post_instr_symbol,
129     kw_heap_alloc_marker,
130     kw_pcsections,
131     kw_cfi_type,
132     kw_bbsections,
133     kw_bb_id,
134     kw_unknown_size,
135     kw_unknown_address,
136     kw_ir_block_address_taken,
137     kw_machine_block_address_taken,
138 
139     // Metadata types.
140     kw_distinct,
141 
142     // Named metadata keywords
143     md_tbaa,
144     md_alias_scope,
145     md_noalias,
146     md_range,
147     md_diexpr,
148     md_dilocation,
149 
150     // Identifier tokens
151     Identifier,
152     NamedRegister,
153     NamedVirtualRegister,
154     MachineBasicBlockLabel,
155     MachineBasicBlock,
156     StackObject,
157     FixedStackObject,
158     NamedGlobalValue,
159     GlobalValue,
160     ExternalSymbol,
161     MCSymbol,
162 
163     // Other tokens
164     IntegerLiteral,
165     FloatingPointLiteral,
166     HexLiteral,
167     VectorLiteral,
168     VirtualRegister,
169     ConstantPoolItem,
170     JumpTableIndex,
171     NamedIRBlock,
172     IRBlock,
173     NamedIRValue,
174     IRValue,
175     QuotedIRValue, // `<constant value>`
176     SubRegisterIndex,
177     StringConstant
178   };
179 
180 private:
181   TokenKind Kind = Error;
182   StringRef Range;
183   StringRef StringValue;
184   std::string StringValueStorage;
185   APSInt IntVal;
186 
187 public:
188   MIToken() = default;
189 
190   MIToken &reset(TokenKind Kind, StringRef Range);
191 
192   MIToken &setStringValue(StringRef StrVal);
193   MIToken &setOwnedStringValue(std::string StrVal);
194   MIToken &setIntegerValue(APSInt IntVal);
195 
196   TokenKind kind() const { return Kind; }
197 
198   bool isError() const { return Kind == Error; }
199 
200   bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
201 
202   bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
203 
204   bool isRegister() const {
205     return Kind == NamedRegister || Kind == underscore ||
206            Kind == NamedVirtualRegister || Kind == VirtualRegister;
207   }
208 
209   bool isRegisterFlag() const {
210     return Kind == kw_implicit || Kind == kw_implicit_define ||
211            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
212            Kind == kw_undef || Kind == kw_internal ||
213            Kind == kw_early_clobber || Kind == kw_debug_use ||
214            Kind == kw_renamable;
215   }
216 
217   bool isMemoryOperandFlag() const {
218     return Kind == kw_volatile || Kind == kw_non_temporal ||
219            Kind == kw_dereferenceable || Kind == kw_invariant ||
220            Kind == StringConstant;
221   }
222 
223   bool is(TokenKind K) const { return Kind == K; }
224 
225   bool isNot(TokenKind K) const { return Kind != K; }
226 
227   StringRef::iterator location() const { return Range.begin(); }
228 
229   StringRef range() const { return Range; }
230 
231   /// Return the token's string value.
232   StringRef stringValue() const { return StringValue; }
233 
234   const APSInt &integerValue() const { return IntVal; }
235 
236   bool hasIntegerValue() const {
237     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
238            Kind == MachineBasicBlockLabel || Kind == StackObject ||
239            Kind == FixedStackObject || Kind == GlobalValue ||
240            Kind == VirtualRegister || Kind == ConstantPoolItem ||
241            Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
242   }
243 };
244 
245 /// Consume a single machine instruction token in the given source and return
246 /// the remaining source string.
247 StringRef lexMIToken(
248     StringRef Source, MIToken &Token,
249     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
250 
251 } // end namespace llvm
252 
253 #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
254