1 //===-- BPFAsmParser.cpp - Parse BPF assembly to MCInst instructions --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/BPFMCTargetDesc.h"
10 #include "TargetInfo/BPFTargetInfo.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/MC/MCContext.h"
14 #include "llvm/MC/MCExpr.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCParser/MCAsmLexer.h"
18 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
19 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
20 #include "llvm/MC/MCRegisterInfo.h"
21 #include "llvm/MC/MCStreamer.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/MC/TargetRegistry.h"
24 #include "llvm/Support/Casting.h"
25 
26 using namespace llvm;
27 
28 namespace {
29 struct BPFOperand;
30 
31 class BPFAsmParser : public MCTargetAsmParser {
32 
33   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
34 
35   bool PreMatchCheck(OperandVector &Operands);
36 
37   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
38                                OperandVector &Operands, MCStreamer &Out,
39                                uint64_t &ErrorInfo,
40                                bool MatchingInlineAsm) override;
41 
42   bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
43                      SMLoc &EndLoc) override;
44   OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
45                                         SMLoc &EndLoc) override;
46 
47   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
48                         SMLoc NameLoc, OperandVector &Operands) override;
49 
50   // "=" is used as assignment operator for assembly statment, so can't be used
51   // for symbol assignment.
52   bool equalIsAsmAssignment() override { return false; }
53   // "*" is used for dereferencing memory that it will be the start of
54   // statement.
55   bool starIsStartOfStatement() override { return true; }
56 
57 #define GET_ASSEMBLER_HEADER
58 #include "BPFGenAsmMatcher.inc"
59 
60   OperandMatchResultTy parseImmediate(OperandVector &Operands);
61   OperandMatchResultTy parseRegister(OperandVector &Operands);
62   OperandMatchResultTy parseOperandAsOperator(OperandVector &Operands);
63 
64 public:
65   enum BPFMatchResultTy {
66     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
67 #define GET_OPERAND_DIAGNOSTIC_TYPES
68 #include "BPFGenAsmMatcher.inc"
69 #undef GET_OPERAND_DIAGNOSTIC_TYPES
70   };
71 
72   BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
73                const MCInstrInfo &MII, const MCTargetOptions &Options)
74       : MCTargetAsmParser(Options, STI, MII) {
75     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
76   }
77 };
78 
79 /// BPFOperand - Instances of this class represent a parsed machine
80 /// instruction
81 struct BPFOperand : public MCParsedAsmOperand {
82 
83   enum KindTy {
84     Token,
85     Register,
86     Immediate,
87   } Kind;
88 
89   struct RegOp {
90     unsigned RegNum;
91   };
92 
93   struct ImmOp {
94     const MCExpr *Val;
95   };
96 
97   SMLoc StartLoc, EndLoc;
98   union {
99     StringRef Tok;
100     RegOp Reg;
101     ImmOp Imm;
102   };
103 
104   BPFOperand(KindTy K) : Kind(K) {}
105 
106 public:
107   BPFOperand(const BPFOperand &o) : MCParsedAsmOperand() {
108     Kind = o.Kind;
109     StartLoc = o.StartLoc;
110     EndLoc = o.EndLoc;
111 
112     switch (Kind) {
113     case Register:
114       Reg = o.Reg;
115       break;
116     case Immediate:
117       Imm = o.Imm;
118       break;
119     case Token:
120       Tok = o.Tok;
121       break;
122     }
123   }
124 
125   bool isToken() const override { return Kind == Token; }
126   bool isReg() const override { return Kind == Register; }
127   bool isImm() const override { return Kind == Immediate; }
128   bool isMem() const override { return false; }
129 
130   bool isConstantImm() const {
131     return isImm() && isa<MCConstantExpr>(getImm());
132   }
133 
134   int64_t getConstantImm() const {
135     const MCExpr *Val = getImm();
136     return static_cast<const MCConstantExpr *>(Val)->getValue();
137   }
138 
139   bool isSImm12() const {
140     return (isConstantImm() && isInt<12>(getConstantImm()));
141   }
142 
143   /// getStartLoc - Gets location of the first token of this operand
144   SMLoc getStartLoc() const override { return StartLoc; }
145   /// getEndLoc - Gets location of the last token of this operand
146   SMLoc getEndLoc() const override { return EndLoc; }
147 
148   unsigned getReg() const override {
149     assert(Kind == Register && "Invalid type access!");
150     return Reg.RegNum;
151   }
152 
153   const MCExpr *getImm() const {
154     assert(Kind == Immediate && "Invalid type access!");
155     return Imm.Val;
156   }
157 
158   StringRef getToken() const {
159     assert(Kind == Token && "Invalid type access!");
160     return Tok;
161   }
162 
163   void print(raw_ostream &OS) const override {
164     switch (Kind) {
165     case Immediate:
166       OS << *getImm();
167       break;
168     case Register:
169       OS << "<register x";
170       OS << getReg() << ">";
171       break;
172     case Token:
173       OS << "'" << getToken() << "'";
174       break;
175     }
176   }
177 
178   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
179     assert(Expr && "Expr shouldn't be null!");
180 
181     if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
182       Inst.addOperand(MCOperand::createImm(CE->getValue()));
183     else
184       Inst.addOperand(MCOperand::createExpr(Expr));
185   }
186 
187   // Used by the TableGen Code
188   void addRegOperands(MCInst &Inst, unsigned N) const {
189     assert(N == 1 && "Invalid number of operands!");
190     Inst.addOperand(MCOperand::createReg(getReg()));
191   }
192 
193   void addImmOperands(MCInst &Inst, unsigned N) const {
194     assert(N == 1 && "Invalid number of operands!");
195     addExpr(Inst, getImm());
196   }
197 
198   static std::unique_ptr<BPFOperand> createToken(StringRef Str, SMLoc S) {
199     auto Op = std::make_unique<BPFOperand>(Token);
200     Op->Tok = Str;
201     Op->StartLoc = S;
202     Op->EndLoc = S;
203     return Op;
204   }
205 
206   static std::unique_ptr<BPFOperand> createReg(unsigned RegNo, SMLoc S,
207                                                SMLoc E) {
208     auto Op = std::make_unique<BPFOperand>(Register);
209     Op->Reg.RegNum = RegNo;
210     Op->StartLoc = S;
211     Op->EndLoc = E;
212     return Op;
213   }
214 
215   static std::unique_ptr<BPFOperand> createImm(const MCExpr *Val, SMLoc S,
216                                                SMLoc E) {
217     auto Op = std::make_unique<BPFOperand>(Immediate);
218     Op->Imm.Val = Val;
219     Op->StartLoc = S;
220     Op->EndLoc = E;
221     return Op;
222   }
223 
224   // Identifiers that can be used at the start of a statment.
225   static bool isValidIdAtStart(StringRef Name) {
226     return StringSwitch<bool>(Name.lower())
227         .Case("if", true)
228         .Case("call", true)
229         .Case("goto", true)
230         .Case("*", true)
231         .Case("exit", true)
232         .Case("lock", true)
233         .Case("ld_pseudo", true)
234         .Default(false);
235   }
236 
237   // Identifiers that can be used in the middle of a statment.
238   static bool isValidIdInMiddle(StringRef Name) {
239     return StringSwitch<bool>(Name.lower())
240         .Case("u64", true)
241         .Case("u32", true)
242         .Case("u16", true)
243         .Case("u8", true)
244         .Case("be64", true)
245         .Case("be32", true)
246         .Case("be16", true)
247         .Case("le64", true)
248         .Case("le32", true)
249         .Case("le16", true)
250         .Case("goto", true)
251         .Case("ll", true)
252         .Case("skb", true)
253         .Case("s", true)
254         .Case("atomic_fetch_add", true)
255         .Case("atomic_fetch_and", true)
256         .Case("atomic_fetch_or", true)
257         .Case("atomic_fetch_xor", true)
258         .Case("xchg_64", true)
259         .Case("xchg32_32", true)
260         .Case("cmpxchg_64", true)
261         .Case("cmpxchg32_32", true)
262         .Default(false);
263   }
264 };
265 } // end anonymous namespace.
266 
267 #define GET_REGISTER_MATCHER
268 #define GET_MATCHER_IMPLEMENTATION
269 #include "BPFGenAsmMatcher.inc"
270 
271 bool BPFAsmParser::PreMatchCheck(OperandVector &Operands) {
272 
273   if (Operands.size() == 4) {
274     // check "reg1 = -reg2" and "reg1 = be16/be32/be64/le16/le32/le64 reg2",
275     // reg1 must be the same as reg2
276     BPFOperand &Op0 = (BPFOperand &)*Operands[0];
277     BPFOperand &Op1 = (BPFOperand &)*Operands[1];
278     BPFOperand &Op2 = (BPFOperand &)*Operands[2];
279     BPFOperand &Op3 = (BPFOperand &)*Operands[3];
280     if (Op0.isReg() && Op1.isToken() && Op2.isToken() && Op3.isReg()
281         && Op1.getToken() == "="
282         && (Op2.getToken() == "-" || Op2.getToken() == "be16"
283             || Op2.getToken() == "be32" || Op2.getToken() == "be64"
284             || Op2.getToken() == "le16" || Op2.getToken() == "le32"
285             || Op2.getToken() == "le64")
286         && Op0.getReg() != Op3.getReg())
287       return true;
288   }
289 
290   return false;
291 }
292 
293 bool BPFAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
294                                            OperandVector &Operands,
295                                            MCStreamer &Out, uint64_t &ErrorInfo,
296                                            bool MatchingInlineAsm) {
297   MCInst Inst;
298   SMLoc ErrorLoc;
299 
300   if (PreMatchCheck(Operands))
301     return Error(IDLoc, "additional inst constraint not met");
302 
303   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
304   default:
305     break;
306   case Match_Success:
307     Inst.setLoc(IDLoc);
308     Out.emitInstruction(Inst, getSTI());
309     return false;
310   case Match_MissingFeature:
311     return Error(IDLoc, "instruction use requires an option to be enabled");
312   case Match_MnemonicFail:
313     return Error(IDLoc, "unrecognized instruction mnemonic");
314   case Match_InvalidOperand:
315     ErrorLoc = IDLoc;
316 
317     if (ErrorInfo != ~0U) {
318       if (ErrorInfo >= Operands.size())
319         return Error(ErrorLoc, "too few operands for instruction");
320 
321       ErrorLoc = ((BPFOperand &)*Operands[ErrorInfo]).getStartLoc();
322 
323       if (ErrorLoc == SMLoc())
324         ErrorLoc = IDLoc;
325     }
326 
327     return Error(ErrorLoc, "invalid operand for instruction");
328   }
329 
330   llvm_unreachable("Unknown match type detected!");
331 }
332 
333 bool BPFAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
334                                  SMLoc &EndLoc) {
335   if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
336     return Error(StartLoc, "invalid register name");
337   return false;
338 }
339 
340 OperandMatchResultTy BPFAsmParser::tryParseRegister(MCRegister &RegNo,
341                                                     SMLoc &StartLoc,
342                                                     SMLoc &EndLoc) {
343   const AsmToken &Tok = getParser().getTok();
344   StartLoc = Tok.getLoc();
345   EndLoc = Tok.getEndLoc();
346   RegNo = 0;
347   StringRef Name = getLexer().getTok().getIdentifier();
348 
349   if (!MatchRegisterName(Name)) {
350     getParser().Lex(); // Eat identifier token.
351     return MatchOperand_Success;
352   }
353 
354   return MatchOperand_NoMatch;
355 }
356 
357 OperandMatchResultTy
358 BPFAsmParser::parseOperandAsOperator(OperandVector &Operands) {
359   SMLoc S = getLoc();
360 
361   if (getLexer().getKind() == AsmToken::Identifier) {
362     StringRef Name = getLexer().getTok().getIdentifier();
363 
364     if (BPFOperand::isValidIdInMiddle(Name)) {
365       getLexer().Lex();
366       Operands.push_back(BPFOperand::createToken(Name, S));
367       return MatchOperand_Success;
368     }
369 
370     return MatchOperand_NoMatch;
371   }
372 
373   switch (getLexer().getKind()) {
374   case AsmToken::Minus:
375   case AsmToken::Plus: {
376     if (getLexer().peekTok().is(AsmToken::Integer))
377       return MatchOperand_NoMatch;
378     [[fallthrough]];
379   }
380 
381   case AsmToken::Equal:
382   case AsmToken::Greater:
383   case AsmToken::Less:
384   case AsmToken::Pipe:
385   case AsmToken::Star:
386   case AsmToken::LParen:
387   case AsmToken::RParen:
388   case AsmToken::LBrac:
389   case AsmToken::RBrac:
390   case AsmToken::Slash:
391   case AsmToken::Amp:
392   case AsmToken::Percent:
393   case AsmToken::Caret: {
394     StringRef Name = getLexer().getTok().getString();
395     getLexer().Lex();
396     Operands.push_back(BPFOperand::createToken(Name, S));
397 
398     return MatchOperand_Success;
399   }
400 
401   case AsmToken::EqualEqual:
402   case AsmToken::ExclaimEqual:
403   case AsmToken::GreaterEqual:
404   case AsmToken::GreaterGreater:
405   case AsmToken::LessEqual:
406   case AsmToken::LessLess: {
407     Operands.push_back(BPFOperand::createToken(
408         getLexer().getTok().getString().substr(0, 1), S));
409     Operands.push_back(BPFOperand::createToken(
410         getLexer().getTok().getString().substr(1, 1), S));
411     getLexer().Lex();
412 
413     return MatchOperand_Success;
414   }
415 
416   default:
417     break;
418   }
419 
420   return MatchOperand_NoMatch;
421 }
422 
423 OperandMatchResultTy BPFAsmParser::parseRegister(OperandVector &Operands) {
424   SMLoc S = getLoc();
425   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
426 
427   switch (getLexer().getKind()) {
428   default:
429     return MatchOperand_NoMatch;
430   case AsmToken::Identifier:
431     StringRef Name = getLexer().getTok().getIdentifier();
432     unsigned RegNo = MatchRegisterName(Name);
433 
434     if (RegNo == 0)
435       return MatchOperand_NoMatch;
436 
437     getLexer().Lex();
438     Operands.push_back(BPFOperand::createReg(RegNo, S, E));
439   }
440   return MatchOperand_Success;
441 }
442 
443 OperandMatchResultTy BPFAsmParser::parseImmediate(OperandVector &Operands) {
444   switch (getLexer().getKind()) {
445   default:
446     return MatchOperand_NoMatch;
447   case AsmToken::LParen:
448   case AsmToken::Minus:
449   case AsmToken::Plus:
450   case AsmToken::Integer:
451   case AsmToken::String:
452   case AsmToken::Identifier:
453     break;
454   }
455 
456   const MCExpr *IdVal;
457   SMLoc S = getLoc();
458 
459   if (getParser().parseExpression(IdVal))
460     return MatchOperand_ParseFail;
461 
462   SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
463   Operands.push_back(BPFOperand::createImm(IdVal, S, E));
464 
465   return MatchOperand_Success;
466 }
467 
468 /// ParseInstruction - Parse an BPF instruction which is in BPF verifier
469 /// format.
470 bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
471                                     SMLoc NameLoc, OperandVector &Operands) {
472   // The first operand could be either register or actually an operator.
473   unsigned RegNo = MatchRegisterName(Name);
474 
475   if (RegNo != 0) {
476     SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() - 1);
477     Operands.push_back(BPFOperand::createReg(RegNo, NameLoc, E));
478   } else if (BPFOperand::isValidIdAtStart (Name))
479     Operands.push_back(BPFOperand::createToken(Name, NameLoc));
480   else
481     return Error(NameLoc, "invalid register/token name");
482 
483   while (!getLexer().is(AsmToken::EndOfStatement)) {
484     // Attempt to parse token as operator
485     if (parseOperandAsOperator(Operands) == MatchOperand_Success)
486       continue;
487 
488     // Attempt to parse token as register
489     if (parseRegister(Operands) == MatchOperand_Success)
490       continue;
491 
492     if (getLexer().is(AsmToken::Comma)) {
493       getLexer().Lex();
494       continue;
495     }
496 
497     // Attempt to parse token as an immediate
498     if (parseImmediate(Operands) != MatchOperand_Success) {
499       SMLoc Loc = getLexer().getLoc();
500       return Error(Loc, "unexpected token");
501     }
502   }
503 
504   if (getLexer().isNot(AsmToken::EndOfStatement)) {
505     SMLoc Loc = getLexer().getLoc();
506 
507     getParser().eatToEndOfStatement();
508 
509     return Error(Loc, "unexpected token");
510   }
511 
512   // Consume the EndOfStatement.
513   getParser().Lex();
514   return false;
515 }
516 
517 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
518   RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
519   RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
520   RegisterMCAsmParser<BPFAsmParser> Z(getTheBPFbeTarget());
521 }
522