1 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16 
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/ADT/StringRef.h"
19 #include <string>
20 
21 namespace llvm {
22 
23 class Twine;
24 
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27   enum TokenKind {
28     // Markers
29     Eof,
30     Error,
31     Newline,
32 
33     // Tokens with no info.
34     comma,
35     equal,
36     underscore,
37     colon,
38     coloncolon,
39     dot,
40     exclaim,
41     lparen,
42     rparen,
43     lbrace,
44     rbrace,
45     plus,
46     minus,
47     less,
48     greater,
49 
50     // Keywords
51     kw_implicit,
52     kw_implicit_define,
53     kw_def,
54     kw_dead,
55     kw_dereferenceable,
56     kw_killed,
57     kw_undef,
58     kw_internal,
59     kw_early_clobber,
60     kw_debug_use,
61     kw_renamable,
62     kw_tied_def,
63     kw_frame_setup,
64     kw_frame_destroy,
65     kw_nnan,
66     kw_ninf,
67     kw_nsz,
68     kw_arcp,
69     kw_contract,
70     kw_afn,
71     kw_reassoc,
72     kw_nuw,
73     kw_nsw,
74     kw_exact,
75     kw_nofpexcept,
76     kw_unpredictable,
77     kw_debug_location,
78     kw_debug_instr_number,
79     kw_dbg_instr_ref,
80     kw_cfi_same_value,
81     kw_cfi_offset,
82     kw_cfi_rel_offset,
83     kw_cfi_def_cfa_register,
84     kw_cfi_def_cfa_offset,
85     kw_cfi_adjust_cfa_offset,
86     kw_cfi_escape,
87     kw_cfi_def_cfa,
88     kw_cfi_llvm_def_aspace_cfa,
89     kw_cfi_register,
90     kw_cfi_remember_state,
91     kw_cfi_restore,
92     kw_cfi_restore_state,
93     kw_cfi_undefined,
94     kw_cfi_window_save,
95     kw_cfi_aarch64_negate_ra_sign_state,
96     kw_blockaddress,
97     kw_intrinsic,
98     kw_target_index,
99     kw_half,
100     kw_float,
101     kw_double,
102     kw_x86_fp80,
103     kw_fp128,
104     kw_ppc_fp128,
105     kw_target_flags,
106     kw_volatile,
107     kw_non_temporal,
108     kw_invariant,
109     kw_align,
110     kw_basealign,
111     kw_addrspace,
112     kw_stack,
113     kw_got,
114     kw_jump_table,
115     kw_constant_pool,
116     kw_call_entry,
117     kw_custom,
118     kw_liveout,
119     kw_landing_pad,
120     kw_inlineasm_br_indirect_target,
121     kw_ehfunclet_entry,
122     kw_liveins,
123     kw_successors,
124     kw_floatpred,
125     kw_intpred,
126     kw_shufflemask,
127     kw_pre_instr_symbol,
128     kw_post_instr_symbol,
129     kw_heap_alloc_marker,
130     kw_pcsections,
131     kw_cfi_type,
132     kw_bbsections,
133     kw_bb_id,
134     kw_unknown_size,
135     kw_unknown_address,
136     kw_ir_block_address_taken,
137     kw_machine_block_address_taken,
138     kw_call_frame_size,
139     kw_noconvergent,
140 
141     // Metadata types.
142     kw_distinct,
143 
144     // Named metadata keywords
145     md_tbaa,
146     md_alias_scope,
147     md_noalias,
148     md_range,
149     md_diexpr,
150     md_dilocation,
151 
152     // Identifier tokens
153     Identifier,
154     NamedRegister,
155     NamedVirtualRegister,
156     MachineBasicBlockLabel,
157     MachineBasicBlock,
158     StackObject,
159     FixedStackObject,
160     NamedGlobalValue,
161     GlobalValue,
162     ExternalSymbol,
163     MCSymbol,
164 
165     // Other tokens
166     IntegerLiteral,
167     FloatingPointLiteral,
168     HexLiteral,
169     VectorLiteral,
170     VirtualRegister,
171     ConstantPoolItem,
172     JumpTableIndex,
173     NamedIRBlock,
174     IRBlock,
175     NamedIRValue,
176     IRValue,
177     QuotedIRValue, // `<constant value>`
178     SubRegisterIndex,
179     StringConstant
180   };
181 
182 private:
183   TokenKind Kind = Error;
184   StringRef Range;
185   StringRef StringValue;
186   std::string StringValueStorage;
187   APSInt IntVal;
188 
189 public:
190   MIToken() = default;
191 
192   MIToken &reset(TokenKind Kind, StringRef Range);
193 
194   MIToken &setStringValue(StringRef StrVal);
195   MIToken &setOwnedStringValue(std::string StrVal);
196   MIToken &setIntegerValue(APSInt IntVal);
197 
kindMIToken198   TokenKind kind() const { return Kind; }
199 
isErrorMIToken200   bool isError() const { return Kind == Error; }
201 
isNewlineOrEOFMIToken202   bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
203 
isErrorOrEOFMIToken204   bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
205 
isRegisterMIToken206   bool isRegister() const {
207     return Kind == NamedRegister || Kind == underscore ||
208            Kind == NamedVirtualRegister || Kind == VirtualRegister;
209   }
210 
isRegisterFlagMIToken211   bool isRegisterFlag() const {
212     return Kind == kw_implicit || Kind == kw_implicit_define ||
213            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
214            Kind == kw_undef || Kind == kw_internal ||
215            Kind == kw_early_clobber || Kind == kw_debug_use ||
216            Kind == kw_renamable;
217   }
218 
isMemoryOperandFlagMIToken219   bool isMemoryOperandFlag() const {
220     return Kind == kw_volatile || Kind == kw_non_temporal ||
221            Kind == kw_dereferenceable || Kind == kw_invariant ||
222            Kind == StringConstant;
223   }
224 
isMIToken225   bool is(TokenKind K) const { return Kind == K; }
226 
isNotMIToken227   bool isNot(TokenKind K) const { return Kind != K; }
228 
locationMIToken229   StringRef::iterator location() const { return Range.begin(); }
230 
rangeMIToken231   StringRef range() const { return Range; }
232 
233   /// Return the token's string value.
stringValueMIToken234   StringRef stringValue() const { return StringValue; }
235 
integerValueMIToken236   const APSInt &integerValue() const { return IntVal; }
237 
hasIntegerValueMIToken238   bool hasIntegerValue() const {
239     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
240            Kind == MachineBasicBlockLabel || Kind == StackObject ||
241            Kind == FixedStackObject || Kind == GlobalValue ||
242            Kind == VirtualRegister || Kind == ConstantPoolItem ||
243            Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
244   }
245 };
246 
247 /// Consume a single machine instruction token in the given source and return
248 /// the remaining source string.
249 StringRef lexMIToken(
250     StringRef Source, MIToken &Token,
251     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
252 
253 } // end namespace llvm
254 
255 #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
256