1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
43   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
44                                         SMLoc &EndLoc) override;
45 
46   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
47                         SMLoc NameLoc, OperandVector &Operands) override;
48 
49   bool ParseDirective(AsmToken DirectiveID) override;
50 
51   // "=" is used as assignment operator for assembly statment, so can't be used
52   // for symbol assignment.
53   bool equalIsAsmAssignment() override { return false; }
54   // "*" is used for dereferencing memory that it will be the start of
55   // statement.
56   bool starIsStartOfStatement() override { return true; }
57 
58 #define GET_ASSEMBLER_HEADER
59 #include "BPFGenAsmMatcher.inc"
60 
61   OperandMatchResultTy parseImmediate(OperandVector &Operands);
62   OperandMatchResultTy parseRegister(OperandVector &Operands);
63   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
64 
65 public:
66   enum BPFMatchResultTy {
67     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
68 #define GET_OPERAND_DIAGNOSTIC_TYPES
69 #include "BPFGenAsmMatcher.inc"
70 #undef GET_OPERAND_DIAGNOSTIC_TYPES
71   };
72 
73   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
74                const MCInstrInfo &MII, const MCTargetOptions &Options)
75       : MCTargetAsmParser(Options, STI, MII) {
76     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
77   }
78 };
79 
80 /// BPFOperand - Instances of this class represent a parsed machine
81 /// instruction
82 struct BPFOperand : public MCParsedAsmOperand {
83 
84   enum KindTy {
85     Token,
86     Register,
87     Immediate,
88   } Kind;
89 
90   struct RegOp {
91     unsigned RegNum;
92   };
93 
94   struct ImmOp {
95     const MCExpr *Val;
96   };
97 
98   SMLoc StartLoc, EndLoc;
99   union {
100     StringRef Tok;
101     RegOp Reg;
102     ImmOp Imm;
103   };
104 
105   BPFOperand(KindTy K) : Kind(K) {}
106 
107 public:
108   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
109     Kind = o.Kind;
110     StartLoc = o.StartLoc;
111     EndLoc = o.EndLoc;
112 
113     switch (Kind) {
114     case Register:
115       Reg = o.Reg;
116       break;
117     case Immediate:
118       Imm = o.Imm;
119       break;
120     case Token:
121       Tok = o.Tok;
122       break;
123     }
124   }
125 
126   bool isToken() const override { return Kind == Token; }
127   bool isReg() const override { return Kind == Register; }
128   bool isImm() const override { return Kind == Immediate; }
129   bool isMem() const override { return false; }
130 
131   bool isConstantImm() const {
132     return isImm() && isa<MCConstantExpr>(getImm());
133   }
134 
135   int64_t getConstantImm() const {
136     const MCExpr *Val = getImm();
137     return static_cast<const MCConstantExpr *>(Val)->getValue();
138   }
139 
140   bool isSImm12() const {
141     return (isConstantImm() && isInt<12>(getConstantImm()));
142   }
143 
144   /// getStartLoc - Gets location of the first token of this operand
145   SMLoc getStartLoc() const override { return StartLoc; }
146   /// getEndLoc - Gets location of the last token of this operand
147   SMLoc getEndLoc() const override { return EndLoc; }
148 
149   unsigned getReg() const override {
150     assert(Kind == Register && "Invalid type access!");
151     return Reg.RegNum;
152   }
153 
154   const MCExpr *getImm() const {
155     assert(Kind == Immediate && "Invalid type access!");
156     return Imm.Val;
157   }
158 
159   StringRef getToken() const {
160     assert(Kind == Token && "Invalid type access!");
161     return Tok;
162   }
163 
164   void print(raw_ostream &OS) const override {
165     switch (Kind) {
166     case Immediate:
167       OS << *getImm();
168       break;
169     case Register:
170       OS << "<register x";
171       OS << getReg() << ">";
172       break;
173     case Token:
174       OS << "'" << getToken() << "'";
175       break;
176     }
177   }
178 
179   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
180     assert(Expr && "Expr shouldn't be null!");
181 
182     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
183       Inst.addOperand(MCOperand::createImm(CE->getValue()));
184     else
185       Inst.addOperand(MCOperand::createExpr(Expr));
186   }
187 
188   // Used by the TableGen Code
189   void addRegOperands(MCInst &Inst, unsigned N) const {
190     assert(N == 1 && "Invalid number of operands!");
191     Inst.addOperand(MCOperand::createReg(getReg()));
192   }
193 
194   void addImmOperands(MCInst &Inst, unsigned N) const {
195     assert(N == 1 && "Invalid number of operands!");
196     addExpr(Inst, getImm());
197   }
198 
199   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
200     auto Op = std::make_unique<BPFOperand>(Token);
201     Op->Tok = Str;
202     Op->StartLoc = S;
203     Op->EndLoc = S;
204     return Op;
205   }
206 
207   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
208                                                SMLoc E) {
209     auto Op = std::make_unique<BPFOperand>(Register);
210     Op->Reg.RegNum = RegNo;
211     Op->StartLoc = S;
212     Op->EndLoc = E;
213     return Op;
214   }
215 
216   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
217                                                SMLoc E) {
218     auto Op = std::make_unique<BPFOperand>(Immediate);
219     Op->Imm.Val = Val;
220     Op->StartLoc = S;
221     Op->EndLoc = E;
222     return Op;
223   }
224 
225   // Identifiers that can be used at the start of a statment.
226   static bool isValidIdAtStart(StringRef Name) {
227     return StringSwitch<bool>(Name.lower())
228         .Case("if", true)
229         .Case("call", true)
230         .Case("goto", true)
231         .Case("*", true)
232         .Case("exit", true)
233         .Case("lock", true)
234         .Case("ld_pseudo", true)
235         .Default(false);
236   }
237 
238   // Identifiers that can be used in the middle of a statment.
239   static bool isValidIdInMiddle(StringRef Name) {
240     return StringSwitch<bool>(Name.lower())
241         .Case("u64", true)
242         .Case("u32", true)
243         .Case("u16", true)
244         .Case("u8", true)
245         .Case("be64", true)
246         .Case("be32", true)
247         .Case("be16", true)
248         .Case("le64", true)
249         .Case("le32", true)
250         .Case("le16", true)
251         .Case("goto", true)
252         .Case("ll", true)
253         .Case("skb", true)
254         .Case("s", true)
255         .Default(false);
256   }
257 };
258 } // end anonymous namespace.
259 
260 #define GET_REGISTER_MATCHER
261 #define GET_MATCHER_IMPLEMENTATION
262 #include "BPFGenAsmMatcher.inc"
263 
264 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
265 
266   if (Operands.size() == 4) {
267     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
268     // reg1 must be the same as reg2
269     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
270     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
271     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
272     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
273     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
274         && Op1.getToken() == "="
275         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
276             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
277             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
278             || Op2.getToken() == "le64")
279         && Op0.getReg() != Op3.getReg())
280       return true;
281   }
282 
283   return false;
284 }
285 
286 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
287                                            OperandVector &Operands,
288                                            MCStreamer &Out, uint64_t &ErrorInfo,
289                                            bool MatchingInlineAsm) {
290   MCInst Inst;
291   SMLoc ErrorLoc;
292 
293   if (PreMatchCheck(Operands))
294     return Error(IDLoc, "additional inst constraint not met");
295 
296   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
297   default:
298     break;
299   case Match_Success:
300     Inst.setLoc(IDLoc);
301     Out.emitInstruction(Inst, getSTI());
302     return false;
303   case Match_MissingFeature:
304     return Error(IDLoc, "instruction use requires an option to be enabled");
305   case Match_MnemonicFail:
306     return Error(IDLoc, "unrecognized instruction mnemonic");
307   case Match_InvalidOperand:
308     ErrorLoc = IDLoc;
309 
310     if (ErrorInfo != ~0U) {
311       if (ErrorInfo >= Operands.size())
312         return Error(ErrorLoc, "too few operands for instruction");
313 
314       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
315 
316       if (ErrorLoc == SMLoc())
317         ErrorLoc = IDLoc;
318     }
319 
320     return Error(ErrorLoc, "invalid operand for instruction");
321   }
322 
323   llvm_unreachable("Unknown match type detected!");
324 }
325 
326 bool BPFAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
327                                  SMLoc &EndLoc) {
328   if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
329     return Error(StartLoc, "invalid register name");
330   return false;
331 }
332 
333 OperandMatchResultTy BPFAsmParser::tryParseRegister(unsigned &RegNo,
334                                                     SMLoc &StartLoc,
335                                                     SMLoc &EndLoc) {
336   const AsmToken &Tok = getParser().getTok();
337   StartLoc = Tok.getLoc();
338   EndLoc = Tok.getEndLoc();
339   RegNo = 0;
340   StringRef Name = getLexer().getTok().getIdentifier();
341 
342   if (!MatchRegisterName(Name)) {
343     getParser().Lex(); // Eat identifier token.
344     return MatchOperand_Success;
345   }
346 
347   return MatchOperand_NoMatch;
348 }
349 
350 OperandMatchResultTy
351 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
352   SMLoc S = getLoc();
353 
354   if (getLexer().getKind() == AsmToken::Identifier) {
355     StringRef Name = getLexer().getTok().getIdentifier();
356 
357     if (BPFOperand::isValidIdInMiddle(Name)) {
358       getLexer().Lex();
359       Operands.push_back(BPFOperand::createToken(Name, S));
360       return MatchOperand_Success;
361     }
362 
363     return MatchOperand_NoMatch;
364   }
365 
366   switch (getLexer().getKind()) {
367   case AsmToken::Minus:
368   case AsmToken::Plus: {
369     if (getLexer().peekTok().is(AsmToken::Integer))
370       return MatchOperand_NoMatch;
371     LLVM_FALLTHROUGH;
372   }
373 
374   case AsmToken::Equal:
375   case AsmToken::Greater:
376   case AsmToken::Less:
377   case AsmToken::Pipe:
378   case AsmToken::Star:
379   case AsmToken::LParen:
380   case AsmToken::RParen:
381   case AsmToken::LBrac:
382   case AsmToken::RBrac:
383   case AsmToken::Slash:
384   case AsmToken::Amp:
385   case AsmToken::Percent:
386   case AsmToken::Caret: {
387     StringRef Name = getLexer().getTok().getString();
388     getLexer().Lex();
389     Operands.push_back(BPFOperand::createToken(Name, S));
390 
391     return MatchOperand_Success;
392   }
393 
394   case AsmToken::EqualEqual:
395   case AsmToken::ExclaimEqual:
396   case AsmToken::GreaterEqual:
397   case AsmToken::GreaterGreater:
398   case AsmToken::LessEqual:
399   case AsmToken::LessLess: {
400     Operands.push_back(BPFOperand::createToken(
401         getLexer().getTok().getString().substr(0, 1), S));
402     Operands.push_back(BPFOperand::createToken(
403         getLexer().getTok().getString().substr(1, 1), S));
404     getLexer().Lex();
405 
406     return MatchOperand_Success;
407   }
408 
409   default:
410     break;
411   }
412 
413   return MatchOperand_NoMatch;
414 }
415 
416 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
417   SMLoc S = getLoc();
418   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
419 
420   switch (getLexer().getKind()) {
421   default:
422     return MatchOperand_NoMatch;
423   case AsmToken::Identifier:
424     StringRef Name = getLexer().getTok().getIdentifier();
425     unsigned RegNo = MatchRegisterName(Name);
426 
427     if (RegNo == 0)
428       return MatchOperand_NoMatch;
429 
430     getLexer().Lex();
431     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
432   }
433   return MatchOperand_Success;
434 }
435 
436 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
437   switch (getLexer().getKind()) {
438   default:
439     return MatchOperand_NoMatch;
440   case AsmToken::LParen:
441   case AsmToken::Minus:
442   case AsmToken::Plus:
443   case AsmToken::Integer:
444   case AsmToken::String:
445   case AsmToken::Identifier:
446     break;
447   }
448 
449   const MCExpr *IdVal;
450   SMLoc S = getLoc();
451 
452   if (getParser().parseExpression(IdVal))
453     return MatchOperand_ParseFail;
454 
455   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
456   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
457 
458   return MatchOperand_Success;
459 }
460 
461 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
462 /// format.
463 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
464                                     SMLoc NameLoc, OperandVector &Operands) {
465   // The first operand could be either register or actually an operator.
466   unsigned RegNo = MatchRegisterName(Name);
467 
468   if (RegNo != 0) {
469     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
470     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
471   } else if (BPFOperand::isValidIdAtStart (Name))
472     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
473   else
474     return Error(NameLoc, "invalid register/token name");
475 
476   while (!getLexer().is(AsmToken::EndOfStatement)) {
477     // Attempt to parse token as operator
478     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
479       continue;
480 
481     // Attempt to parse token as register
482     if (parseRegister(Operands) == MatchOperand_Success)
483       continue;
484 
485     // Attempt to parse token as an immediate
486     if (parseImmediate(Operands) != MatchOperand_Success) {
487       SMLoc Loc = getLexer().getLoc();
488       return Error(Loc, "unexpected token");
489     }
490   }
491 
492   if (getLexer().isNot(AsmToken::EndOfStatement)) {
493     SMLoc Loc = getLexer().getLoc();
494 
495     getParser().eatToEndOfStatement();
496 
497     return Error(Loc, "unexpected token");
498   }
499 
500   // Consume the EndOfStatement.
501   getParser().Lex();
502   return false;
503 }
504 
505 bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
506 
507 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
508   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
509   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
510   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
511 }
512