1 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16 
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/ADT/StringRef.h"
19 #include <string>
20 
21 namespace llvm {
22 
23 class Twine;
24 
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27   enum TokenKind {
28     // Markers
29     Eof,
30     Error,
31     Newline,
32 
33     // Tokens with no info.
34     comma,
35     equal,
36     underscore,
37     colon,
38     coloncolon,
39     dot,
40     exclaim,
41     lparen,
42     rparen,
43     lbrace,
44     rbrace,
45     plus,
46     minus,
47     less,
48     greater,
49 
50     // Keywords
51     kw_implicit,
52     kw_implicit_define,
53     kw_def,
54     kw_dead,
55     kw_dereferenceable,
56     kw_killed,
57     kw_undef,
58     kw_internal,
59     kw_early_clobber,
60     kw_debug_use,
61     kw_renamable,
62     kw_tied_def,
63     kw_frame_setup,
64     kw_frame_destroy,
65     kw_nnan,
66     kw_ninf,
67     kw_nsz,
68     kw_arcp,
69     kw_contract,
70     kw_afn,
71     kw_reassoc,
72     kw_nuw,
73     kw_nsw,
74     kw_exact,
75     kw_nofpexcept,
76     kw_debug_location,
77     kw_debug_instr_number,
78     kw_dbg_instr_ref,
79     kw_cfi_same_value,
80     kw_cfi_offset,
81     kw_cfi_rel_offset,
82     kw_cfi_def_cfa_register,
83     kw_cfi_def_cfa_offset,
84     kw_cfi_adjust_cfa_offset,
85     kw_cfi_escape,
86     kw_cfi_def_cfa,
87     kw_cfi_llvm_def_aspace_cfa,
88     kw_cfi_register,
89     kw_cfi_remember_state,
90     kw_cfi_restore,
91     kw_cfi_restore_state,
92     kw_cfi_undefined,
93     kw_cfi_window_save,
94     kw_cfi_aarch64_negate_ra_sign_state,
95     kw_blockaddress,
96     kw_intrinsic,
97     kw_target_index,
98     kw_half,
99     kw_float,
100     kw_double,
101     kw_x86_fp80,
102     kw_fp128,
103     kw_ppc_fp128,
104     kw_target_flags,
105     kw_volatile,
106     kw_non_temporal,
107     kw_invariant,
108     kw_align,
109     kw_basealign,
110     kw_addrspace,
111     kw_stack,
112     kw_got,
113     kw_jump_table,
114     kw_constant_pool,
115     kw_call_entry,
116     kw_custom,
117     kw_liveout,
118     kw_landing_pad,
119     kw_inlineasm_br_indirect_target,
120     kw_ehfunclet_entry,
121     kw_liveins,
122     kw_successors,
123     kw_floatpred,
124     kw_intpred,
125     kw_shufflemask,
126     kw_pre_instr_symbol,
127     kw_post_instr_symbol,
128     kw_heap_alloc_marker,
129     kw_pcsections,
130     kw_cfi_type,
131     kw_bbsections,
132     kw_bb_id,
133     kw_unknown_size,
134     kw_unknown_address,
135     kw_ir_block_address_taken,
136     kw_machine_block_address_taken,
137 
138     // Metadata types.
139     kw_distinct,
140 
141     // Named metadata keywords
142     md_tbaa,
143     md_alias_scope,
144     md_noalias,
145     md_range,
146     md_diexpr,
147     md_dilocation,
148 
149     // Identifier tokens
150     Identifier,
151     NamedRegister,
152     NamedVirtualRegister,
153     MachineBasicBlockLabel,
154     MachineBasicBlock,
155     StackObject,
156     FixedStackObject,
157     NamedGlobalValue,
158     GlobalValue,
159     ExternalSymbol,
160     MCSymbol,
161 
162     // Other tokens
163     IntegerLiteral,
164     FloatingPointLiteral,
165     HexLiteral,
166     VectorLiteral,
167     VirtualRegister,
168     ConstantPoolItem,
169     JumpTableIndex,
170     NamedIRBlock,
171     IRBlock,
172     NamedIRValue,
173     IRValue,
174     QuotedIRValue, // `<constant value>`
175     SubRegisterIndex,
176     StringConstant
177   };
178 
179 private:
180   TokenKind Kind = Error;
181   StringRef Range;
182   StringRef StringValue;
183   std::string StringValueStorage;
184   APSInt IntVal;
185 
186 public:
187   MIToken() = default;
188 
189   MIToken &reset(TokenKind Kind, StringRef Range);
190 
191   MIToken &setStringValue(StringRef StrVal);
192   MIToken &setOwnedStringValue(std::string StrVal);
193   MIToken &setIntegerValue(APSInt IntVal);
194 
195   TokenKind kind() const { return Kind; }
196 
197   bool isError() const { return Kind == Error; }
198 
199   bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
200 
201   bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
202 
203   bool isRegister() const {
204     return Kind == NamedRegister || Kind == underscore ||
205            Kind == NamedVirtualRegister || Kind == VirtualRegister;
206   }
207 
208   bool isRegisterFlag() const {
209     return Kind == kw_implicit || Kind == kw_implicit_define ||
210            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
211            Kind == kw_undef || Kind == kw_internal ||
212            Kind == kw_early_clobber || Kind == kw_debug_use ||
213            Kind == kw_renamable;
214   }
215 
216   bool isMemoryOperandFlag() const {
217     return Kind == kw_volatile || Kind == kw_non_temporal ||
218            Kind == kw_dereferenceable || Kind == kw_invariant ||
219            Kind == StringConstant;
220   }
221 
222   bool is(TokenKind K) const { return Kind == K; }
223 
224   bool isNot(TokenKind K) const { return Kind != K; }
225 
226   StringRef::iterator location() const { return Range.begin(); }
227 
228   StringRef range() const { return Range; }
229 
230   /// Return the token's string value.
231   StringRef stringValue() const { return StringValue; }
232 
233   const APSInt &integerValue() const { return IntVal; }
234 
235   bool hasIntegerValue() const {
236     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
237            Kind == MachineBasicBlockLabel || Kind == StackObject ||
238            Kind == FixedStackObject || Kind == GlobalValue ||
239            Kind == VirtualRegister || Kind == ConstantPoolItem ||
240            Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
241   }
242 };
243 
244 /// Consume a single machine instruction token in the given source and return
245 /// the remaining source string.
246 StringRef lexMIToken(
247     StringRef Source, MIToken &Token,
248     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
249 
250 } // end namespace llvm
251 
252 #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
253