1 //===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the function that lexes the machine instruction source
10 // string.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
15 #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
16 
17 #include "llvm/ADT/APSInt.h"
18 #include "llvm/ADT/StringRef.h"
19 #include <string>
20 
21 namespace llvm {
22 
23 class Twine;
24 
25 /// A token produced by the machine instruction lexer.
26 struct MIToken {
27   enum TokenKind {
28     // Markers
29     Eof,
30     Error,
31     Newline,
32 
33     // Tokens with no info.
34     comma,
35     equal,
36     underscore,
37     colon,
38     coloncolon,
39     dot,
40     exclaim,
41     lparen,
42     rparen,
43     lbrace,
44     rbrace,
45     plus,
46     minus,
47     less,
48     greater,
49 
50     // Keywords
51     kw_implicit,
52     kw_implicit_define,
53     kw_def,
54     kw_dead,
55     kw_dereferenceable,
56     kw_killed,
57     kw_undef,
58     kw_internal,
59     kw_early_clobber,
60     kw_debug_use,
61     kw_renamable,
62     kw_tied_def,
63     kw_frame_setup,
64     kw_frame_destroy,
65     kw_nnan,
66     kw_ninf,
67     kw_nsz,
68     kw_arcp,
69     kw_contract,
70     kw_afn,
71     kw_reassoc,
72     kw_nusw,
73     kw_nuw,
74     kw_nsw,
75     kw_exact,
76     kw_nofpexcept,
77     kw_unpredictable,
78     kw_nneg,
79     kw_disjoint,
80     kw_debug_location,
81     kw_debug_instr_number,
82     kw_dbg_instr_ref,
83     kw_cfi_same_value,
84     kw_cfi_offset,
85     kw_cfi_rel_offset,
86     kw_cfi_def_cfa_register,
87     kw_cfi_def_cfa_offset,
88     kw_cfi_adjust_cfa_offset,
89     kw_cfi_escape,
90     kw_cfi_def_cfa,
91     kw_cfi_llvm_def_aspace_cfa,
92     kw_cfi_register,
93     kw_cfi_remember_state,
94     kw_cfi_restore,
95     kw_cfi_restore_state,
96     kw_cfi_undefined,
97     kw_cfi_window_save,
98     kw_cfi_aarch64_negate_ra_sign_state,
99     kw_blockaddress,
100     kw_intrinsic,
101     kw_target_index,
102     kw_half,
103     kw_bfloat,
104     kw_float,
105     kw_double,
106     kw_x86_fp80,
107     kw_fp128,
108     kw_ppc_fp128,
109     kw_target_flags,
110     kw_volatile,
111     kw_non_temporal,
112     kw_invariant,
113     kw_align,
114     kw_basealign,
115     kw_addrspace,
116     kw_stack,
117     kw_got,
118     kw_jump_table,
119     kw_constant_pool,
120     kw_call_entry,
121     kw_custom,
122     kw_liveout,
123     kw_landing_pad,
124     kw_inlineasm_br_indirect_target,
125     kw_ehfunclet_entry,
126     kw_liveins,
127     kw_successors,
128     kw_floatpred,
129     kw_intpred,
130     kw_shufflemask,
131     kw_pre_instr_symbol,
132     kw_post_instr_symbol,
133     kw_heap_alloc_marker,
134     kw_pcsections,
135     kw_cfi_type,
136     kw_bbsections,
137     kw_bb_id,
138     kw_unknown_size,
139     kw_unknown_address,
140     kw_ir_block_address_taken,
141     kw_machine_block_address_taken,
142     kw_call_frame_size,
143     kw_noconvergent,
144 
145     // Metadata types.
146     kw_distinct,
147 
148     // Named metadata keywords
149     md_tbaa,
150     md_alias_scope,
151     md_noalias,
152     md_range,
153     md_diexpr,
154     md_dilocation,
155 
156     // Identifier tokens
157     Identifier,
158     NamedRegister,
159     NamedVirtualRegister,
160     MachineBasicBlockLabel,
161     MachineBasicBlock,
162     StackObject,
163     FixedStackObject,
164     NamedGlobalValue,
165     GlobalValue,
166     ExternalSymbol,
167     MCSymbol,
168 
169     // Other tokens
170     IntegerLiteral,
171     FloatingPointLiteral,
172     HexLiteral,
173     VectorLiteral,
174     VirtualRegister,
175     ConstantPoolItem,
176     JumpTableIndex,
177     NamedIRBlock,
178     IRBlock,
179     NamedIRValue,
180     IRValue,
181     QuotedIRValue, // `<constant value>`
182     SubRegisterIndex,
183     StringConstant
184   };
185 
186 private:
187   TokenKind Kind = Error;
188   StringRef Range;
189   StringRef StringValue;
190   std::string StringValueStorage;
191   APSInt IntVal;
192 
193 public:
194   MIToken() = default;
195 
196   MIToken &reset(TokenKind Kind, StringRef Range);
197 
198   MIToken &setStringValue(StringRef StrVal);
199   MIToken &setOwnedStringValue(std::string StrVal);
200   MIToken &setIntegerValue(APSInt IntVal);
201 
kindMIToken202   TokenKind kind() const { return Kind; }
203 
isErrorMIToken204   bool isError() const { return Kind == Error; }
205 
isNewlineOrEOFMIToken206   bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
207 
isErrorOrEOFMIToken208   bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
209 
isRegisterMIToken210   bool isRegister() const {
211     return Kind == NamedRegister || Kind == underscore ||
212            Kind == NamedVirtualRegister || Kind == VirtualRegister;
213   }
214 
isRegisterFlagMIToken215   bool isRegisterFlag() const {
216     return Kind == kw_implicit || Kind == kw_implicit_define ||
217            Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
218            Kind == kw_undef || Kind == kw_internal ||
219            Kind == kw_early_clobber || Kind == kw_debug_use ||
220            Kind == kw_renamable;
221   }
222 
isMemoryOperandFlagMIToken223   bool isMemoryOperandFlag() const {
224     return Kind == kw_volatile || Kind == kw_non_temporal ||
225            Kind == kw_dereferenceable || Kind == kw_invariant ||
226            Kind == StringConstant;
227   }
228 
isMIToken229   bool is(TokenKind K) const { return Kind == K; }
230 
isNotMIToken231   bool isNot(TokenKind K) const { return Kind != K; }
232 
locationMIToken233   StringRef::iterator location() const { return Range.begin(); }
234 
rangeMIToken235   StringRef range() const { return Range; }
236 
237   /// Return the token's string value.
stringValueMIToken238   StringRef stringValue() const { return StringValue; }
239 
integerValueMIToken240   const APSInt &integerValue() const { return IntVal; }
241 
hasIntegerValueMIToken242   bool hasIntegerValue() const {
243     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
244            Kind == MachineBasicBlockLabel || Kind == StackObject ||
245            Kind == FixedStackObject || Kind == GlobalValue ||
246            Kind == VirtualRegister || Kind == ConstantPoolItem ||
247            Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
248   }
249 };
250 
251 /// Consume a single machine instruction token in the given source and return
252 /// the remaining source string.
253 StringRef lexMIToken(
254     StringRef Source, MIToken &Token,
255     function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback);
256 
257 } // end namespace llvm
258 
259 #endif // LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H
260