1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCParser/MCAsmLexer.h"
17 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
18 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCStreamer.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/Support/Casting.h"
23 #include "llvm/Support/TargetRegistry.h"
24 
25 using namespace llvm;
26 
27 namespace {
28 struct BPFOperand;
29 
30 class BPFAsmParser : public MCTargetAsmParser {
31 
32   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
33 
34   bool PreMatchCheck(OperandVector &Operands);
35 
36   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
37                                OperandVector &Operands, MCStreamer &Out,
38                                uint64_t &ErrorInfo,
39                                bool MatchingInlineAsm) override;
40 
41   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
42 
43   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
44                         SMLoc NameLoc, OperandVector &Operands) override;
45 
46   bool ParseDirective(AsmToken DirectiveID) override;
47 
48   // "=" is used as assignment operator for assembly statment, so can't be used
49   // for symbol assignment.
50   bool equalIsAsmAssignment() override { return false; }
51   // "*" is used for dereferencing memory that it will be the start of
52   // statement.
53   bool starIsStartOfStatement() override { return true; }
54 
55 #define GET_ASSEMBLER_HEADER
56 #include "BPFGenAsmMatcher.inc"
57 
58   OperandMatchResultTy parseImmediate(OperandVector &Operands);
59   OperandMatchResultTy parseRegister(OperandVector &Operands);
60   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
61 
62 public:
63   enum BPFMatchResultTy {
64     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
65 #define GET_OPERAND_DIAGNOSTIC_TYPES
66 #include "BPFGenAsmMatcher.inc"
67 #undef GET_OPERAND_DIAGNOSTIC_TYPES
68   };
69 
70   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
71                const MCInstrInfo &MII, const MCTargetOptions &Options)
72       : MCTargetAsmParser(Options, STI, MII) {
73     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
74   }
75 };
76 
77 /// BPFOperand - Instances of this class represent a parsed machine
78 /// instruction
79 struct BPFOperand : public MCParsedAsmOperand {
80 
81   enum KindTy {
82     Token,
83     Register,
84     Immediate,
85   } Kind;
86 
87   struct RegOp {
88     unsigned RegNum;
89   };
90 
91   struct ImmOp {
92     const MCExpr *Val;
93   };
94 
95   SMLoc StartLoc, EndLoc;
96   union {
97     StringRef Tok;
98     RegOp Reg;
99     ImmOp Imm;
100   };
101 
102   BPFOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
103 
104 public:
105   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
106     Kind = o.Kind;
107     StartLoc = o.StartLoc;
108     EndLoc = o.EndLoc;
109 
110     switch (Kind) {
111     case Register:
112       Reg = o.Reg;
113       break;
114     case Immediate:
115       Imm = o.Imm;
116       break;
117     case Token:
118       Tok = o.Tok;
119       break;
120     }
121   }
122 
123   bool isToken() const override { return Kind == Token; }
124   bool isReg() const override { return Kind == Register; }
125   bool isImm() const override { return Kind == Immediate; }
126   bool isMem() const override { return false; }
127 
128   bool isConstantImm() const {
129     return isImm() && isa<MCConstantExpr>(getImm());
130   }
131 
132   int64_t getConstantImm() const {
133     const MCExpr *Val = getImm();
134     return static_cast<const MCConstantExpr *>(Val)->getValue();
135   }
136 
137   bool isSImm12() const {
138     return (isConstantImm() && isInt<12>(getConstantImm()));
139   }
140 
141   /// getStartLoc - Gets location of the first token of this operand
142   SMLoc getStartLoc() const override { return StartLoc; }
143   /// getEndLoc - Gets location of the last token of this operand
144   SMLoc getEndLoc() const override { return EndLoc; }
145 
146   unsigned getReg() const override {
147     assert(Kind == Register && "Invalid type access!");
148     return Reg.RegNum;
149   }
150 
151   const MCExpr *getImm() const {
152     assert(Kind == Immediate && "Invalid type access!");
153     return Imm.Val;
154   }
155 
156   StringRef getToken() const {
157     assert(Kind == Token && "Invalid type access!");
158     return Tok;
159   }
160 
161   void print(raw_ostream &OS) const override {
162     switch (Kind) {
163     case Immediate:
164       OS << *getImm();
165       break;
166     case Register:
167       OS << "<register x";
168       OS << getReg() << ">";
169       break;
170     case Token:
171       OS << "'" << getToken() << "'";
172       break;
173     }
174   }
175 
176   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
177     assert(Expr && "Expr shouldn't be null!");
178 
179     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
180       Inst.addOperand(MCOperand::createImm(CE->getValue()));
181     else
182       Inst.addOperand(MCOperand::createExpr(Expr));
183   }
184 
185   // Used by the TableGen Code
186   void addRegOperands(MCInst &Inst, unsigned N) const {
187     assert(N == 1 && "Invalid number of operands!");
188     Inst.addOperand(MCOperand::createReg(getReg()));
189   }
190 
191   void addImmOperands(MCInst &Inst, unsigned N) const {
192     assert(N == 1 && "Invalid number of operands!");
193     addExpr(Inst, getImm());
194   }
195 
196   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
197     auto Op = make_unique<BPFOperand>(Token);
198     Op->Tok = Str;
199     Op->StartLoc = S;
200     Op->EndLoc = S;
201     return Op;
202   }
203 
204   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
205                                                SMLoc E) {
206     auto Op = make_unique<BPFOperand>(Register);
207     Op->Reg.RegNum = RegNo;
208     Op->StartLoc = S;
209     Op->EndLoc = E;
210     return Op;
211   }
212 
213   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
214                                                SMLoc E) {
215     auto Op = make_unique<BPFOperand>(Immediate);
216     Op->Imm.Val = Val;
217     Op->StartLoc = S;
218     Op->EndLoc = E;
219     return Op;
220   }
221 
222   // Identifiers that can be used at the start of a statment.
223   static bool isValidIdAtStart(StringRef Name) {
224     return StringSwitch<bool>(Name.lower())
225         .Case("if", true)
226         .Case("call", true)
227         .Case("goto", true)
228         .Case("*", true)
229         .Case("exit", true)
230         .Case("lock", true)
231         .Case("ld_pseudo", true)
232         .Default(false);
233   }
234 
235   // Identifiers that can be used in the middle of a statment.
236   static bool isValidIdInMiddle(StringRef Name) {
237     return StringSwitch<bool>(Name.lower())
238         .Case("u64", true)
239         .Case("u32", true)
240         .Case("u16", true)
241         .Case("u8", true)
242         .Case("be64", true)
243         .Case("be32", true)
244         .Case("be16", true)
245         .Case("le64", true)
246         .Case("le32", true)
247         .Case("le16", true)
248         .Case("goto", true)
249         .Case("ll", true)
250         .Case("skb", true)
251         .Case("s", true)
252         .Default(false);
253   }
254 };
255 } // end anonymous namespace.
256 
257 #define GET_REGISTER_MATCHER
258 #define GET_MATCHER_IMPLEMENTATION
259 #include "BPFGenAsmMatcher.inc"
260 
261 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
262 
263   if (Operands.size() == 4) {
264     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
265     // reg1 must be the same as reg2
266     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
267     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
268     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
269     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
270     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
271         && Op1.getToken() == "="
272         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
273             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
274             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
275             || Op2.getToken() == "le64")
276         && Op0.getReg() != Op3.getReg())
277       return true;
278   }
279 
280   return false;
281 }
282 
283 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
284                                            OperandVector &Operands,
285                                            MCStreamer &Out, uint64_t &ErrorInfo,
286                                            bool MatchingInlineAsm) {
287   MCInst Inst;
288   SMLoc ErrorLoc;
289 
290   if (PreMatchCheck(Operands))
291     return Error(IDLoc, "additional inst constraint not met");
292 
293   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
294   default:
295     break;
296   case Match_Success:
297     Inst.setLoc(IDLoc);
298     Out.EmitInstruction(Inst, getSTI());
299     return false;
300   case Match_MissingFeature:
301     return Error(IDLoc, "instruction use requires an option to be enabled");
302   case Match_MnemonicFail:
303     return Error(IDLoc, "unrecognized instruction mnemonic");
304   case Match_InvalidOperand:
305     ErrorLoc = IDLoc;
306 
307     if (ErrorInfo != ~0U) {
308       if (ErrorInfo >= Operands.size())
309         return Error(ErrorLoc, "too few operands for instruction");
310 
311       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
312 
313       if (ErrorLoc == SMLoc())
314         ErrorLoc = IDLoc;
315     }
316 
317     return Error(ErrorLoc, "invalid operand for instruction");
318   }
319 
320   llvm_unreachable("Unknown match type detected!");
321 }
322 
323 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
324                                  SMLoc &EndLoc) {
325   const AsmToken &Tok = getParser().getTok();
326   StartLoc = Tok.getLoc();
327   EndLoc = Tok.getEndLoc();
328   RegNo = 0;
329   StringRef Name = getLexer().getTok().getIdentifier();
330 
331   if (!MatchRegisterName(Name)) {
332     getParser().Lex(); // Eat identifier token.
333     return false;
334   }
335 
336   return Error(StartLoc, "invalid register name");
337 }
338 
339 OperandMatchResultTy
340 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
341   SMLoc S = getLoc();
342 
343   if (getLexer().getKind() == AsmToken::Identifier) {
344     StringRef Name = getLexer().getTok().getIdentifier();
345 
346     if (BPFOperand::isValidIdInMiddle(Name)) {
347       getLexer().Lex();
348       Operands.push_back(BPFOperand::createToken(Name, S));
349       return MatchOperand_Success;
350     }
351 
352     return MatchOperand_NoMatch;
353   }
354 
355   switch (getLexer().getKind()) {
356   case AsmToken::Minus:
357   case AsmToken::Plus: {
358     if (getLexer().peekTok().is(AsmToken::Integer))
359       return MatchOperand_NoMatch;
360     LLVM_FALLTHROUGH;
361   }
362 
363   case AsmToken::Equal:
364   case AsmToken::Greater:
365   case AsmToken::Less:
366   case AsmToken::Pipe:
367   case AsmToken::Star:
368   case AsmToken::LParen:
369   case AsmToken::RParen:
370   case AsmToken::LBrac:
371   case AsmToken::RBrac:
372   case AsmToken::Slash:
373   case AsmToken::Amp:
374   case AsmToken::Percent:
375   case AsmToken::Caret: {
376     StringRef Name = getLexer().getTok().getString();
377     getLexer().Lex();
378     Operands.push_back(BPFOperand::createToken(Name, S));
379 
380     return MatchOperand_Success;
381   }
382 
383   case AsmToken::EqualEqual:
384   case AsmToken::ExclaimEqual:
385   case AsmToken::GreaterEqual:
386   case AsmToken::GreaterGreater:
387   case AsmToken::LessEqual:
388   case AsmToken::LessLess: {
389     Operands.push_back(BPFOperand::createToken(
390         getLexer().getTok().getString().substr(0, 1), S));
391     Operands.push_back(BPFOperand::createToken(
392         getLexer().getTok().getString().substr(1, 1), S));
393     getLexer().Lex();
394 
395     return MatchOperand_Success;
396   }
397 
398   default:
399     break;
400   }
401 
402   return MatchOperand_NoMatch;
403 }
404 
405 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
406   SMLoc S = getLoc();
407   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
408 
409   switch (getLexer().getKind()) {
410   default:
411     return MatchOperand_NoMatch;
412   case AsmToken::Identifier:
413     StringRef Name = getLexer().getTok().getIdentifier();
414     unsigned RegNo = MatchRegisterName(Name);
415 
416     if (RegNo == 0)
417       return MatchOperand_NoMatch;
418 
419     getLexer().Lex();
420     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
421   }
422   return MatchOperand_Success;
423 }
424 
425 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
426   switch (getLexer().getKind()) {
427   default:
428     return MatchOperand_NoMatch;
429   case AsmToken::LParen:
430   case AsmToken::Minus:
431   case AsmToken::Plus:
432   case AsmToken::Integer:
433   case AsmToken::String:
434   case AsmToken::Identifier:
435     break;
436   }
437 
438   const MCExpr *IdVal;
439   SMLoc S = getLoc();
440 
441   if (getParser().parseExpression(IdVal))
442     return MatchOperand_ParseFail;
443 
444   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
445   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
446 
447   return MatchOperand_Success;
448 }
449 
450 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
451 /// format.
452 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
453                                     SMLoc NameLoc, OperandVector &Operands) {
454   // The first operand could be either register or actually an operator.
455   unsigned RegNo = MatchRegisterName(Name);
456 
457   if (RegNo != 0) {
458     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
459     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
460   } else if (BPFOperand::isValidIdAtStart (Name))
461     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
462   else
463     return Error(NameLoc, "invalid register/token name");
464 
465   while (!getLexer().is(AsmToken::EndOfStatement)) {
466     // Attempt to parse token as operator
467     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
468       continue;
469 
470     // Attempt to parse token as register
471     if (parseRegister(Operands) == MatchOperand_Success)
472       continue;
473 
474     // Attempt to parse token as an immediate
475     if (parseImmediate(Operands) != MatchOperand_Success) {
476       SMLoc Loc = getLexer().getLoc();
477       return Error(Loc, "unexpected token");
478     }
479   }
480 
481   if (getLexer().isNot(AsmToken::EndOfStatement)) {
482     SMLoc Loc = getLexer().getLoc();
483 
484     getParser().eatToEndOfStatement();
485 
486     return Error(Loc, "unexpected token");
487   }
488 
489   // Consume the EndOfStatement.
490   getParser().Lex();
491   return false;
492 }
493 
494 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
495 
496 extern "C" void LLVMInitializeBPFAsmParser() {
497   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
498   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
499   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
500 }
501