1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
43                      SMLoc &EndLoc) override;
44   OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
45                                         SMLoc &EndLoc) override;
46 
47   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
48                         SMLoc NameLoc, OperandVector &Operands) override;
49 
50   bool ParseDirective(AsmToken DirectiveID) override;
51 
52   // "=" is used as assignment operator for assembly statment, so can't be used
53   // for symbol assignment.
54   bool equalIsAsmAssignment() override { return false; }
55   // "*" is used for dereferencing memory that it will be the start of
56   // statement.
57   bool starIsStartOfStatement() override { return true; }
58 
59 #define GET_ASSEMBLER_HEADER
60 #include "BPFGenAsmMatcher.inc"
61 
62   OperandMatchResultTy parseImmediate(OperandVector &Operands);
63   OperandMatchResultTy parseRegister(OperandVector &Operands);
64   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
65 
66 public:
67   enum BPFMatchResultTy {
68     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
69 #define GET_OPERAND_DIAGNOSTIC_TYPES
70 #include "BPFGenAsmMatcher.inc"
71 #undef GET_OPERAND_DIAGNOSTIC_TYPES
72   };
73 
74   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
75                const MCInstrInfo &MII, const MCTargetOptions &Options)
76       : MCTargetAsmParser(Options, STI, MII) {
77     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
78   }
79 };
80 
81 /// BPFOperand - Instances of this class represent a parsed machine
82 /// instruction
83 struct BPFOperand : public MCParsedAsmOperand {
84 
85   enum KindTy {
86     Token,
87     Register,
88     Immediate,
89   } Kind;
90 
91   struct RegOp {
92     unsigned RegNum;
93   };
94 
95   struct ImmOp {
96     const MCExpr *Val;
97   };
98 
99   SMLoc StartLoc, EndLoc;
100   union {
101     StringRef Tok;
102     RegOp Reg;
103     ImmOp Imm;
104   };
105 
106   BPFOperand(KindTy K) : Kind(K) {}
107 
108 public:
109   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
110     Kind = o.Kind;
111     StartLoc = o.StartLoc;
112     EndLoc = o.EndLoc;
113 
114     switch (Kind) {
115     case Register:
116       Reg = o.Reg;
117       break;
118     case Immediate:
119       Imm = o.Imm;
120       break;
121     case Token:
122       Tok = o.Tok;
123       break;
124     }
125   }
126 
127   bool isToken() const override { return Kind == Token; }
128   bool isReg() const override { return Kind == Register; }
129   bool isImm() const override { return Kind == Immediate; }
130   bool isMem() const override { return false; }
131 
132   bool isConstantImm() const {
133     return isImm() && isa<MCConstantExpr>(getImm());
134   }
135 
136   int64_t getConstantImm() const {
137     const MCExpr *Val = getImm();
138     return static_cast<const MCConstantExpr *>(Val)->getValue();
139   }
140 
141   bool isSImm12() const {
142     return (isConstantImm() && isInt<12>(getConstantImm()));
143   }
144 
145   /// getStartLoc - Gets location of the first token of this operand
146   SMLoc getStartLoc() const override { return StartLoc; }
147   /// getEndLoc - Gets location of the last token of this operand
148   SMLoc getEndLoc() const override { return EndLoc; }
149 
150   unsigned getReg() const override {
151     assert(Kind == Register && "Invalid type access!");
152     return Reg.RegNum;
153   }
154 
155   const MCExpr *getImm() const {
156     assert(Kind == Immediate && "Invalid type access!");
157     return Imm.Val;
158   }
159 
160   StringRef getToken() const {
161     assert(Kind == Token && "Invalid type access!");
162     return Tok;
163   }
164 
165   void print(raw_ostream &OS) const override {
166     switch (Kind) {
167     case Immediate:
168       OS << *getImm();
169       break;
170     case Register:
171       OS << "<register x";
172       OS << getReg() << ">";
173       break;
174     case Token:
175       OS << "'" << getToken() << "'";
176       break;
177     }
178   }
179 
180   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
181     assert(Expr && "Expr shouldn't be null!");
182 
183     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
184       Inst.addOperand(MCOperand::createImm(CE->getValue()));
185     else
186       Inst.addOperand(MCOperand::createExpr(Expr));
187   }
188 
189   // Used by the TableGen Code
190   void addRegOperands(MCInst &Inst, unsigned N) const {
191     assert(N == 1 && "Invalid number of operands!");
192     Inst.addOperand(MCOperand::createReg(getReg()));
193   }
194 
195   void addImmOperands(MCInst &Inst, unsigned N) const {
196     assert(N == 1 && "Invalid number of operands!");
197     addExpr(Inst, getImm());
198   }
199 
200   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
201     auto Op = std::make_unique<BPFOperand>(Token);
202     Op->Tok = Str;
203     Op->StartLoc = S;
204     Op->EndLoc = S;
205     return Op;
206   }
207 
208   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
209                                                SMLoc E) {
210     auto Op = std::make_unique<BPFOperand>(Register);
211     Op->Reg.RegNum = RegNo;
212     Op->StartLoc = S;
213     Op->EndLoc = E;
214     return Op;
215   }
216 
217   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
218                                                SMLoc E) {
219     auto Op = std::make_unique<BPFOperand>(Immediate);
220     Op->Imm.Val = Val;
221     Op->StartLoc = S;
222     Op->EndLoc = E;
223     return Op;
224   }
225 
226   // Identifiers that can be used at the start of a statment.
227   static bool isValidIdAtStart(StringRef Name) {
228     return StringSwitch<bool>(Name.lower())
229         .Case("if", true)
230         .Case("call", true)
231         .Case("goto", true)
232         .Case("*", true)
233         .Case("exit", true)
234         .Case("lock", true)
235         .Case("ld_pseudo", true)
236         .Default(false);
237   }
238 
239   // Identifiers that can be used in the middle of a statment.
240   static bool isValidIdInMiddle(StringRef Name) {
241     return StringSwitch<bool>(Name.lower())
242         .Case("u64", true)
243         .Case("u32", true)
244         .Case("u16", true)
245         .Case("u8", true)
246         .Case("be64", true)
247         .Case("be32", true)
248         .Case("be16", true)
249         .Case("le64", true)
250         .Case("le32", true)
251         .Case("le16", true)
252         .Case("goto", true)
253         .Case("ll", true)
254         .Case("skb", true)
255         .Case("s", true)
256         .Default(false);
257   }
258 };
259 } // end anonymous namespace.
260 
261 #define GET_REGISTER_MATCHER
262 #define GET_MATCHER_IMPLEMENTATION
263 #include "BPFGenAsmMatcher.inc"
264 
265 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
266 
267   if (Operands.size() == 4) {
268     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
269     // reg1 must be the same as reg2
270     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
271     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
272     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
273     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
274     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
275         && Op1.getToken() == "="
276         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
277             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
278             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
279             || Op2.getToken() == "le64")
280         && Op0.getReg() != Op3.getReg())
281       return true;
282   }
283 
284   return false;
285 }
286 
287 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
288                                            OperandVector &Operands,
289                                            MCStreamer &Out, uint64_t &ErrorInfo,
290                                            bool MatchingInlineAsm) {
291   MCInst Inst;
292   SMLoc ErrorLoc;
293 
294   if (PreMatchCheck(Operands))
295     return Error(IDLoc, "additional inst constraint not met");
296 
297   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
298   default:
299     break;
300   case Match_Success:
301     Inst.setLoc(IDLoc);
302     Out.emitInstruction(Inst, getSTI());
303     return false;
304   case Match_MissingFeature:
305     return Error(IDLoc, "instruction use requires an option to be enabled");
306   case Match_MnemonicFail:
307     return Error(IDLoc, "unrecognized instruction mnemonic");
308   case Match_InvalidOperand:
309     ErrorLoc = IDLoc;
310 
311     if (ErrorInfo != ~0U) {
312       if (ErrorInfo >= Operands.size())
313         return Error(ErrorLoc, "too few operands for instruction");
314 
315       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
316 
317       if (ErrorLoc == SMLoc())
318         ErrorLoc = IDLoc;
319     }
320 
321     return Error(ErrorLoc, "invalid operand for instruction");
322   }
323 
324   llvm_unreachable("Unknown match type detected!");
325 }
326 
327 bool BPFAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
328                                  SMLoc &EndLoc) {
329   if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
330     return Error(StartLoc, "invalid register name");
331   return false;
332 }
333 
334 OperandMatchResultTy BPFAsmParser::tryParseRegister(MCRegister &RegNo,
335                                                     SMLoc &StartLoc,
336                                                     SMLoc &EndLoc) {
337   const AsmToken &Tok = getParser().getTok();
338   StartLoc = Tok.getLoc();
339   EndLoc = Tok.getEndLoc();
340   RegNo = 0;
341   StringRef Name = getLexer().getTok().getIdentifier();
342 
343   if (!MatchRegisterName(Name)) {
344     getParser().Lex(); // Eat identifier token.
345     return MatchOperand_Success;
346   }
347 
348   return MatchOperand_NoMatch;
349 }
350 
351 OperandMatchResultTy
352 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
353   SMLoc S = getLoc();
354 
355   if (getLexer().getKind() == AsmToken::Identifier) {
356     StringRef Name = getLexer().getTok().getIdentifier();
357 
358     if (BPFOperand::isValidIdInMiddle(Name)) {
359       getLexer().Lex();
360       Operands.push_back(BPFOperand::createToken(Name, S));
361       return MatchOperand_Success;
362     }
363 
364     return MatchOperand_NoMatch;
365   }
366 
367   switch (getLexer().getKind()) {
368   case AsmToken::Minus:
369   case AsmToken::Plus: {
370     if (getLexer().peekTok().is(AsmToken::Integer))
371       return MatchOperand_NoMatch;
372     [[fallthrough]];
373   }
374 
375   case AsmToken::Equal:
376   case AsmToken::Greater:
377   case AsmToken::Less:
378   case AsmToken::Pipe:
379   case AsmToken::Star:
380   case AsmToken::LParen:
381   case AsmToken::RParen:
382   case AsmToken::LBrac:
383   case AsmToken::RBrac:
384   case AsmToken::Slash:
385   case AsmToken::Amp:
386   case AsmToken::Percent:
387   case AsmToken::Caret: {
388     StringRef Name = getLexer().getTok().getString();
389     getLexer().Lex();
390     Operands.push_back(BPFOperand::createToken(Name, S));
391 
392     return MatchOperand_Success;
393   }
394 
395   case AsmToken::EqualEqual:
396   case AsmToken::ExclaimEqual:
397   case AsmToken::GreaterEqual:
398   case AsmToken::GreaterGreater:
399   case AsmToken::LessEqual:
400   case AsmToken::LessLess: {
401     Operands.push_back(BPFOperand::createToken(
402         getLexer().getTok().getString().substr(0, 1), S));
403     Operands.push_back(BPFOperand::createToken(
404         getLexer().getTok().getString().substr(1, 1), S));
405     getLexer().Lex();
406 
407     return MatchOperand_Success;
408   }
409 
410   default:
411     break;
412   }
413 
414   return MatchOperand_NoMatch;
415 }
416 
417 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
418   SMLoc S = getLoc();
419   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
420 
421   switch (getLexer().getKind()) {
422   default:
423     return MatchOperand_NoMatch;
424   case AsmToken::Identifier:
425     StringRef Name = getLexer().getTok().getIdentifier();
426     unsigned RegNo = MatchRegisterName(Name);
427 
428     if (RegNo == 0)
429       return MatchOperand_NoMatch;
430 
431     getLexer().Lex();
432     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
433   }
434   return MatchOperand_Success;
435 }
436 
437 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
438   switch (getLexer().getKind()) {
439   default:
440     return MatchOperand_NoMatch;
441   case AsmToken::LParen:
442   case AsmToken::Minus:
443   case AsmToken::Plus:
444   case AsmToken::Integer:
445   case AsmToken::String:
446   case AsmToken::Identifier:
447     break;
448   }
449 
450   const MCExpr *IdVal;
451   SMLoc S = getLoc();
452 
453   if (getParser().parseExpression(IdVal))
454     return MatchOperand_ParseFail;
455 
456   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
457   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
458 
459   return MatchOperand_Success;
460 }
461 
462 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
463 /// format.
464 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
465                                     SMLoc NameLoc, OperandVector &Operands) {
466   // The first operand could be either register or actually an operator.
467   unsigned RegNo = MatchRegisterName(Name);
468 
469   if (RegNo != 0) {
470     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
471     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
472   } else if (BPFOperand::isValidIdAtStart (Name))
473     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
474   else
475     return Error(NameLoc, "invalid register/token name");
476 
477   while (!getLexer().is(AsmToken::EndOfStatement)) {
478     // Attempt to parse token as operator
479     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
480       continue;
481 
482     // Attempt to parse token as register
483     if (parseRegister(Operands) == MatchOperand_Success)
484       continue;
485 
486     // Attempt to parse token as an immediate
487     if (parseImmediate(Operands) != MatchOperand_Success) {
488       SMLoc Loc = getLexer().getLoc();
489       return Error(Loc, "unexpected token");
490     }
491   }
492 
493   if (getLexer().isNot(AsmToken::EndOfStatement)) {
494     SMLoc Loc = getLexer().getLoc();
495 
496     getParser().eatToEndOfStatement();
497 
498     return Error(Loc, "unexpected token");
499   }
500 
501   // Consume the EndOfStatement.
502   getParser().Lex();
503   return false;
504 }
505 
506 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
507 
508 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
509   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
510   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
511   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
512 }
513