1 //===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the private interfaces of FileCheck. Its purpose is to
10 // allow unit testing of FileCheck and to separate the interface from the
11 // implementation. It is only meant to be used by FileCheck.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_FILECHECK_FILECHECKIMPL_H
16 #define LLVM_LIB_FILECHECK_FILECHECKIMPL_H
17 
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/FileCheck/FileCheck.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/SourceMgr.h"
24 #include <map>
25 #include <optional>
26 #include <string>
27 #include <vector>
28 
29 namespace llvm {
30 
31 //===----------------------------------------------------------------------===//
32 // Numeric substitution handling code.
33 //===----------------------------------------------------------------------===//
34 
35 /// Type representing the format an expression value should be textualized into
36 /// for matching. Used to represent both explicit format specifiers as well as
37 /// implicit format from using numeric variables.
38 struct ExpressionFormat {
39   enum class Kind {
40     /// Denote absence of format. Used for implicit format of literals and
41     /// empty expressions.
42     NoFormat,
43     /// Value is an unsigned integer and should be printed as a decimal number.
44     Unsigned,
45     /// Value is a signed integer and should be printed as a decimal number.
46     Signed,
47     /// Value should be printed as an uppercase hex number.
48     HexUpper,
49     /// Value should be printed as a lowercase hex number.
50     HexLower
51   };
52 
53 private:
54   Kind Value;
55   unsigned Precision = 0;
56   /// printf-like "alternate form" selected.
57   bool AlternateForm = false;
58 
59 public:
60   /// Evaluates a format to true if it can be used in a match.
61   explicit operator bool() const { return Value != Kind::NoFormat; }
62 
63   /// Define format equality: formats are equal if neither is NoFormat and
64   /// their kinds and precision are the same.
65   bool operator==(const ExpressionFormat &Other) const {
66     return Value != Kind::NoFormat && Value == Other.Value &&
67            Precision == Other.Precision && AlternateForm == Other.AlternateForm;
68   }
69 
70   bool operator!=(const ExpressionFormat &Other) const {
71     return !(*this == Other);
72   }
73 
74   bool operator==(Kind OtherValue) const { return Value == OtherValue; }
75 
76   bool operator!=(Kind OtherValue) const { return !(*this == OtherValue); }
77 
78   /// \returns the format specifier corresponding to this format as a string.
79   StringRef toString() const;
80 
ExpressionFormatExpressionFormat81   ExpressionFormat() : Value(Kind::NoFormat){};
ExpressionFormatExpressionFormat82   explicit ExpressionFormat(Kind Value) : Value(Value), Precision(0){};
ExpressionFormatExpressionFormat83   explicit ExpressionFormat(Kind Value, unsigned Precision)
84       : Value(Value), Precision(Precision){};
ExpressionFormatExpressionFormat85   explicit ExpressionFormat(Kind Value, unsigned Precision, bool AlternateForm)
86       : Value(Value), Precision(Precision), AlternateForm(AlternateForm){};
87 
88   /// \returns a wildcard regular expression string that matches any value in
89   /// the format represented by this instance and no other value, or an error
90   /// if the format is NoFormat.
91   Expected<std::string> getWildcardRegex() const;
92 
93   /// \returns the string representation of \p Value in the format represented
94   /// by this instance, or an error if conversion to this format failed or the
95   /// format is NoFormat.
96   Expected<std::string> getMatchingString(APInt Value) const;
97 
98   /// \returns the value corresponding to string representation \p StrVal
99   /// according to the matching format represented by this instance.
100   APInt valueFromStringRepr(StringRef StrVal, const SourceMgr &SM) const;
101 };
102 
103 /// Class to represent an overflow error that might result when manipulating a
104 /// value.
105 class OverflowError : public ErrorInfo<OverflowError> {
106 public:
107   static char ID;
108 
convertToErrorCode()109   std::error_code convertToErrorCode() const override {
110     return std::make_error_code(std::errc::value_too_large);
111   }
112 
log(raw_ostream & OS)113   void log(raw_ostream &OS) const override { OS << "overflow error"; }
114 };
115 
116 /// Performs operation and \returns its result or an error in case of failure,
117 /// such as if an overflow occurs.
118 Expected<APInt> exprAdd(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
119 Expected<APInt> exprSub(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
120 Expected<APInt> exprMul(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
121 Expected<APInt> exprDiv(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
122 Expected<APInt> exprMax(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
123 Expected<APInt> exprMin(const APInt &Lhs, const APInt &Rhs, bool &Overflow);
124 
125 /// Base class representing the AST of a given expression.
126 class ExpressionAST {
127 private:
128   StringRef ExpressionStr;
129 
130 public:
ExpressionAST(StringRef ExpressionStr)131   ExpressionAST(StringRef ExpressionStr) : ExpressionStr(ExpressionStr) {}
132 
133   virtual ~ExpressionAST() = default;
134 
getExpressionStr()135   StringRef getExpressionStr() const { return ExpressionStr; }
136 
137   /// Evaluates and \returns the value of the expression represented by this
138   /// AST or an error if evaluation fails.
139   virtual Expected<APInt> eval() const = 0;
140 
141   /// \returns either the implicit format of this AST, a diagnostic against
142   /// \p SM if implicit formats of the AST's components conflict, or NoFormat
143   /// if the AST has no implicit format (e.g. AST is made up of a single
144   /// literal).
145   virtual Expected<ExpressionFormat>
getImplicitFormat(const SourceMgr & SM)146   getImplicitFormat(const SourceMgr &SM) const {
147     return ExpressionFormat();
148   }
149 };
150 
151 /// Class representing an unsigned literal in the AST of an expression.
152 class ExpressionLiteral : public ExpressionAST {
153 private:
154   /// Actual value of the literal.
155   APInt Value;
156 
157 public:
ExpressionLiteral(StringRef ExpressionStr,APInt Val)158   explicit ExpressionLiteral(StringRef ExpressionStr, APInt Val)
159       : ExpressionAST(ExpressionStr), Value(Val) {}
160 
161   /// \returns the literal's value.
eval()162   Expected<APInt> eval() const override { return Value; }
163 };
164 
165 /// Class to represent an undefined variable error, which quotes that
166 /// variable's name when printed.
167 class UndefVarError : public ErrorInfo<UndefVarError> {
168 private:
169   StringRef VarName;
170 
171 public:
172   static char ID;
173 
UndefVarError(StringRef VarName)174   UndefVarError(StringRef VarName) : VarName(VarName) {}
175 
getVarName()176   StringRef getVarName() const { return VarName; }
177 
convertToErrorCode()178   std::error_code convertToErrorCode() const override {
179     return inconvertibleErrorCode();
180   }
181 
182   /// Print name of variable associated with this error.
log(raw_ostream & OS)183   void log(raw_ostream &OS) const override {
184     OS << "undefined variable: " << VarName;
185   }
186 };
187 
188 /// Class representing an expression and its matching format.
189 class Expression {
190 private:
191   /// Pointer to AST of the expression.
192   std::unique_ptr<ExpressionAST> AST;
193 
194   /// Format to use (e.g. hex upper case letters) when matching the value.
195   ExpressionFormat Format;
196 
197 public:
198   /// Generic constructor for an expression represented by the given \p AST and
199   /// whose matching format is \p Format.
Expression(std::unique_ptr<ExpressionAST> AST,ExpressionFormat Format)200   Expression(std::unique_ptr<ExpressionAST> AST, ExpressionFormat Format)
201       : AST(std::move(AST)), Format(Format) {}
202 
203   /// \returns pointer to AST of the expression. Pointer is guaranteed to be
204   /// valid as long as this object is.
getAST()205   ExpressionAST *getAST() const { return AST.get(); }
206 
getFormat()207   ExpressionFormat getFormat() const { return Format; }
208 };
209 
210 /// Class representing a numeric variable and its associated current value.
211 class NumericVariable {
212 private:
213   /// Name of the numeric variable.
214   StringRef Name;
215 
216   /// Format to use for expressions using this variable without an explicit
217   /// format.
218   ExpressionFormat ImplicitFormat;
219 
220   /// Value of numeric variable, if defined, or std::nullopt otherwise.
221   std::optional<APInt> Value;
222 
223   /// The input buffer's string from which Value was parsed, or std::nullopt.
224   /// See comments on getStringValue for a discussion of the std::nullopt case.
225   std::optional<StringRef> StrValue;
226 
227   /// Line number where this variable is defined, or std::nullopt if defined
228   /// before input is parsed. Used to determine whether a variable is defined on
229   /// the same line as a given use.
230   std::optional<size_t> DefLineNumber;
231 
232 public:
233   /// Constructor for a variable \p Name with implicit format \p ImplicitFormat
234   /// defined at line \p DefLineNumber or defined before input is parsed if
235   /// \p DefLineNumber is std::nullopt.
236   explicit NumericVariable(StringRef Name, ExpressionFormat ImplicitFormat,
237                            std::optional<size_t> DefLineNumber = std::nullopt)
Name(Name)238       : Name(Name), ImplicitFormat(ImplicitFormat),
239         DefLineNumber(DefLineNumber) {}
240 
241   /// \returns name of this numeric variable.
getName()242   StringRef getName() const { return Name; }
243 
244   /// \returns implicit format of this numeric variable.
getImplicitFormat()245   ExpressionFormat getImplicitFormat() const { return ImplicitFormat; }
246 
247   /// \returns this variable's value.
getValue()248   std::optional<APInt> getValue() const { return Value; }
249 
250   /// \returns the input buffer's string from which this variable's value was
251   /// parsed, or std::nullopt if the value is not yet defined or was not parsed
252   /// from the input buffer.  For example, the value of @LINE is not parsed from
253   /// the input buffer, and some numeric variables are parsed from the command
254   /// line instead.
getStringValue()255   std::optional<StringRef> getStringValue() const { return StrValue; }
256 
257   /// Sets value of this numeric variable to \p NewValue, and sets the input
258   /// buffer string from which it was parsed to \p NewStrValue.  See comments on
259   /// getStringValue for a discussion of when the latter can be std::nullopt.
260   void setValue(APInt NewValue,
261                 std::optional<StringRef> NewStrValue = std::nullopt) {
262     Value = NewValue;
263     StrValue = NewStrValue;
264   }
265 
266   /// Clears value of this numeric variable, regardless of whether it is
267   /// currently defined or not.
clearValue()268   void clearValue() {
269     Value = std::nullopt;
270     StrValue = std::nullopt;
271   }
272 
273   /// \returns the line number where this variable is defined, if any, or
274   /// std::nullopt if defined before input is parsed.
getDefLineNumber()275   std::optional<size_t> getDefLineNumber() const { return DefLineNumber; }
276 };
277 
278 /// Class representing the use of a numeric variable in the AST of an
279 /// expression.
280 class NumericVariableUse : public ExpressionAST {
281 private:
282   /// Pointer to the class instance for the variable this use is about.
283   NumericVariable *Variable;
284 
285 public:
NumericVariableUse(StringRef Name,NumericVariable * Variable)286   NumericVariableUse(StringRef Name, NumericVariable *Variable)
287       : ExpressionAST(Name), Variable(Variable) {}
288   /// \returns the value of the variable referenced by this instance.
289   Expected<APInt> eval() const override;
290 
291   /// \returns implicit format of this numeric variable.
292   Expected<ExpressionFormat>
getImplicitFormat(const SourceMgr & SM)293   getImplicitFormat(const SourceMgr &SM) const override {
294     return Variable->getImplicitFormat();
295   }
296 };
297 
298 /// Type of functions evaluating a given binary operation.
299 using binop_eval_t = Expected<APInt> (*)(const APInt &, const APInt &, bool &);
300 
301 /// Class representing a single binary operation in the AST of an expression.
302 class BinaryOperation : public ExpressionAST {
303 private:
304   /// Left operand.
305   std::unique_ptr<ExpressionAST> LeftOperand;
306 
307   /// Right operand.
308   std::unique_ptr<ExpressionAST> RightOperand;
309 
310   /// Pointer to function that can evaluate this binary operation.
311   binop_eval_t EvalBinop;
312 
313 public:
BinaryOperation(StringRef ExpressionStr,binop_eval_t EvalBinop,std::unique_ptr<ExpressionAST> LeftOp,std::unique_ptr<ExpressionAST> RightOp)314   BinaryOperation(StringRef ExpressionStr, binop_eval_t EvalBinop,
315                   std::unique_ptr<ExpressionAST> LeftOp,
316                   std::unique_ptr<ExpressionAST> RightOp)
317       : ExpressionAST(ExpressionStr), EvalBinop(EvalBinop) {
318     LeftOperand = std::move(LeftOp);
319     RightOperand = std::move(RightOp);
320   }
321 
322   /// Evaluates the value of the binary operation represented by this AST,
323   /// using EvalBinop on the result of recursively evaluating the operands.
324   /// \returns the expression value or an error if an undefined numeric
325   /// variable is used in one of the operands.
326   Expected<APInt> eval() const override;
327 
328   /// \returns the implicit format of this AST, if any, a diagnostic against
329   /// \p SM if the implicit formats of the AST's components conflict, or no
330   /// format if the AST has no implicit format (e.g. AST is made of a single
331   /// literal).
332   Expected<ExpressionFormat>
333   getImplicitFormat(const SourceMgr &SM) const override;
334 };
335 
336 class FileCheckPatternContext;
337 
338 /// Class representing a substitution to perform in the RegExStr string.
339 class Substitution {
340 protected:
341   /// Pointer to a class instance holding, among other things, the table with
342   /// the values of live string variables at the start of any given CHECK line.
343   /// Used for substituting string variables with the text they were defined
344   /// as. Expressions are linked to the numeric variables they use at
345   /// parse time and directly access the value of the numeric variable to
346   /// evaluate their value.
347   FileCheckPatternContext *Context;
348 
349   /// The string that needs to be substituted for something else. For a
350   /// string variable this is its name, otherwise this is the whole expression.
351   StringRef FromStr;
352 
353   // Index in RegExStr of where to do the substitution.
354   size_t InsertIdx;
355 
356 public:
Substitution(FileCheckPatternContext * Context,StringRef VarName,size_t InsertIdx)357   Substitution(FileCheckPatternContext *Context, StringRef VarName,
358                size_t InsertIdx)
359       : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
360 
361   virtual ~Substitution() = default;
362 
363   /// \returns the string to be substituted for something else.
getFromString()364   StringRef getFromString() const { return FromStr; }
365 
366   /// \returns the index where the substitution is to be performed in RegExStr.
getIndex()367   size_t getIndex() const { return InsertIdx; }
368 
369   /// \returns a string containing the result of the substitution represented
370   /// by this class instance or an error if substitution failed.
371   virtual Expected<std::string> getResult() const = 0;
372 };
373 
374 class StringSubstitution : public Substitution {
375 public:
StringSubstitution(FileCheckPatternContext * Context,StringRef VarName,size_t InsertIdx)376   StringSubstitution(FileCheckPatternContext *Context, StringRef VarName,
377                      size_t InsertIdx)
378       : Substitution(Context, VarName, InsertIdx) {}
379 
380   /// \returns the text that the string variable in this substitution matched
381   /// when defined, or an error if the variable is undefined.
382   Expected<std::string> getResult() const override;
383 };
384 
385 class NumericSubstitution : public Substitution {
386 private:
387   /// Pointer to the class representing the expression whose value is to be
388   /// substituted.
389   std::unique_ptr<Expression> ExpressionPointer;
390 
391 public:
NumericSubstitution(FileCheckPatternContext * Context,StringRef ExpressionStr,std::unique_ptr<Expression> ExpressionPointer,size_t InsertIdx)392   NumericSubstitution(FileCheckPatternContext *Context, StringRef ExpressionStr,
393                       std::unique_ptr<Expression> ExpressionPointer,
394                       size_t InsertIdx)
395       : Substitution(Context, ExpressionStr, InsertIdx),
396         ExpressionPointer(std::move(ExpressionPointer)) {}
397 
398   /// \returns a string containing the result of evaluating the expression in
399   /// this substitution, or an error if evaluation failed.
400   Expected<std::string> getResult() const override;
401 };
402 
403 //===----------------------------------------------------------------------===//
404 // Pattern handling code.
405 //===----------------------------------------------------------------------===//
406 
407 /// Class holding the Pattern global state, shared by all patterns: tables
408 /// holding values of variables and whether they are defined or not at any
409 /// given time in the matching process.
410 class FileCheckPatternContext {
411   friend class Pattern;
412 
413 private:
414   /// When matching a given pattern, this holds the value of all the string
415   /// variables defined in previous patterns. In a pattern, only the last
416   /// definition for a given variable is recorded in this table.
417   /// Back-references are used for uses after any the other definition.
418   StringMap<StringRef> GlobalVariableTable;
419 
420   /// Map of all string variables defined so far. Used at parse time to detect
421   /// a name conflict between a numeric variable and a string variable when
422   /// the former is defined on a later line than the latter.
423   StringMap<bool> DefinedVariableTable;
424 
425   /// When matching a given pattern, this holds the pointers to the classes
426   /// representing the numeric variables defined in previous patterns. When
427   /// matching a pattern all definitions for that pattern are recorded in the
428   /// NumericVariableDefs table in the Pattern instance of that pattern.
429   StringMap<NumericVariable *> GlobalNumericVariableTable;
430 
431   /// Pointer to the class instance representing the @LINE pseudo variable for
432   /// easily updating its value.
433   NumericVariable *LineVariable = nullptr;
434 
435   /// Vector holding pointers to all parsed numeric variables. Used to
436   /// automatically free them once they are guaranteed to no longer be used.
437   std::vector<std::unique_ptr<NumericVariable>> NumericVariables;
438 
439   /// Vector holding pointers to all parsed expressions. Used to automatically
440   /// free the expressions once they are guaranteed to no longer be used.
441   std::vector<std::unique_ptr<Expression>> Expressions;
442 
443   /// Vector holding pointers to all substitutions. Used to automatically free
444   /// them once they are guaranteed to no longer be used.
445   std::vector<std::unique_ptr<Substitution>> Substitutions;
446 
447 public:
448   /// \returns the value of string variable \p VarName or an error if no such
449   /// variable has been defined.
450   Expected<StringRef> getPatternVarValue(StringRef VarName);
451 
452   /// Defines string and numeric variables from definitions given on the
453   /// command line, passed as a vector of [#]VAR=VAL strings in
454   /// \p CmdlineDefines. \returns an error list containing diagnostics against
455   /// \p SM for all definition parsing failures, if any, or Success otherwise.
456   Error defineCmdlineVariables(ArrayRef<StringRef> CmdlineDefines,
457                                SourceMgr &SM);
458 
459   /// Create @LINE pseudo variable. Value is set when pattern are being
460   /// matched.
461   void createLineVariable();
462 
463   /// Undefines local variables (variables whose name does not start with a '$'
464   /// sign), i.e. removes them from GlobalVariableTable and from
465   /// GlobalNumericVariableTable and also clears the value of numeric
466   /// variables.
467   void clearLocalVars();
468 
469 private:
470   /// Makes a new numeric variable and registers it for destruction when the
471   /// context is destroyed.
472   template <class... Types> NumericVariable *makeNumericVariable(Types... args);
473 
474   /// Makes a new string substitution and registers it for destruction when the
475   /// context is destroyed.
476   Substitution *makeStringSubstitution(StringRef VarName, size_t InsertIdx);
477 
478   /// Makes a new numeric substitution and registers it for destruction when
479   /// the context is destroyed.
480   Substitution *makeNumericSubstitution(StringRef ExpressionStr,
481                                         std::unique_ptr<Expression> Expression,
482                                         size_t InsertIdx);
483 };
484 
485 /// Class to represent an error holding a diagnostic with location information
486 /// used when printing it.
487 class ErrorDiagnostic : public ErrorInfo<ErrorDiagnostic> {
488 private:
489   SMDiagnostic Diagnostic;
490   SMRange Range;
491 
492 public:
493   static char ID;
494 
ErrorDiagnostic(SMDiagnostic && Diag,SMRange Range)495   ErrorDiagnostic(SMDiagnostic &&Diag, SMRange Range)
496       : Diagnostic(Diag), Range(Range) {}
497 
convertToErrorCode()498   std::error_code convertToErrorCode() const override {
499     return inconvertibleErrorCode();
500   }
501 
502   /// Print diagnostic associated with this error when printing the error.
log(raw_ostream & OS)503   void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
504 
getMessage()505   StringRef getMessage() const { return Diagnostic.getMessage(); }
getRange()506   SMRange getRange() const { return Range; }
507 
508   static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg,
509                    SMRange Range = std::nullopt) {
510     return make_error<ErrorDiagnostic>(
511         SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg), Range);
512   }
513 
get(const SourceMgr & SM,StringRef Buffer,const Twine & ErrMsg)514   static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
515     SMLoc Start = SMLoc::getFromPointer(Buffer.data());
516     SMLoc End = SMLoc::getFromPointer(Buffer.data() + Buffer.size());
517     return get(SM, Start, ErrMsg, SMRange(Start, End));
518   }
519 };
520 
521 class NotFoundError : public ErrorInfo<NotFoundError> {
522 public:
523   static char ID;
524 
convertToErrorCode()525   std::error_code convertToErrorCode() const override {
526     return inconvertibleErrorCode();
527   }
528 
529   /// Print diagnostic associated with this error when printing the error.
log(raw_ostream & OS)530   void log(raw_ostream &OS) const override {
531     OS << "String not found in input";
532   }
533 };
534 
535 /// An error that has already been reported.
536 ///
537 /// This class is designed to support a function whose callers may need to know
538 /// whether the function encountered and reported an error but never need to
539 /// know the nature of that error.  For example, the function has a return type
540 /// of \c Error and always returns either \c ErrorReported or \c ErrorSuccess.
541 /// That interface is similar to that of a function returning bool to indicate
542 /// an error except, in the former case, (1) there is no confusion over polarity
543 /// and (2) the caller must either check the result or explicitly ignore it with
544 /// a call like \c consumeError.
545 class ErrorReported final : public ErrorInfo<ErrorReported> {
546 public:
547   static char ID;
548 
convertToErrorCode()549   std::error_code convertToErrorCode() const override {
550     return inconvertibleErrorCode();
551   }
552 
553   /// Print diagnostic associated with this error when printing the error.
log(raw_ostream & OS)554   void log(raw_ostream &OS) const override {
555     OS << "error previously reported";
556   }
557 
reportedOrSuccess(bool HasErrorReported)558   static inline Error reportedOrSuccess(bool HasErrorReported) {
559     if (HasErrorReported)
560       return make_error<ErrorReported>();
561     return Error::success();
562   }
563 };
564 
565 class Pattern {
566   SMLoc PatternLoc;
567 
568   /// A fixed string to match as the pattern or empty if this pattern requires
569   /// a regex match.
570   StringRef FixedStr;
571 
572   /// A regex string to match as the pattern or empty if this pattern requires
573   /// a fixed string to match.
574   std::string RegExStr;
575 
576   /// Entries in this vector represent a substitution of a string variable or
577   /// an expression in the RegExStr regex at match time. For example, in the
578   /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
579   /// RegExStr will contain "foobaz" and we'll get two entries in this vector
580   /// that tells us to insert the value of string variable "bar" at offset 3
581   /// and the value of expression "N+1" at offset 6.
582   std::vector<Substitution *> Substitutions;
583 
584   /// Maps names of string variables defined in a pattern to the number of
585   /// their parenthesis group in RegExStr capturing their last definition.
586   ///
587   /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
588   /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
589   /// the value captured for QUUX on the earlier line where it was defined, and
590   /// VariableDefs will map "bar" to the third parenthesis group which captures
591   /// the second definition of "bar".
592   ///
593   /// Note: uses std::map rather than StringMap to be able to get the key when
594   /// iterating over values.
595   std::map<StringRef, unsigned> VariableDefs;
596 
597   /// Structure representing the definition of a numeric variable in a pattern.
598   /// It holds the pointer to the class instance holding the value and matching
599   /// format of the numeric variable whose value is being defined and the
600   /// number of the parenthesis group in RegExStr to capture that value.
601   struct NumericVariableMatch {
602     /// Pointer to class instance holding the value and matching format of the
603     /// numeric variable being defined.
604     NumericVariable *DefinedNumericVariable;
605 
606     /// Number of the parenthesis group in RegExStr that captures the value of
607     /// this numeric variable definition.
608     unsigned CaptureParenGroup;
609   };
610 
611   /// Holds the number of the parenthesis group in RegExStr and pointer to the
612   /// corresponding NumericVariable class instance of all numeric variable
613   /// definitions. Used to set the matched value of all those variables.
614   StringMap<NumericVariableMatch> NumericVariableDefs;
615 
616   /// Pointer to a class instance holding the global state shared by all
617   /// patterns:
618   /// - separate tables with the values of live string and numeric variables
619   ///   respectively at the start of any given CHECK line;
620   /// - table holding whether a string variable has been defined at any given
621   ///   point during the parsing phase.
622   FileCheckPatternContext *Context;
623 
624   Check::FileCheckType CheckTy;
625 
626   /// Line number for this CHECK pattern or std::nullopt if it is an implicit
627   /// pattern. Used to determine whether a variable definition is made on an
628   /// earlier line to the one with this CHECK.
629   std::optional<size_t> LineNumber;
630 
631   /// Ignore case while matching if set to true.
632   bool IgnoreCase = false;
633 
634 public:
635   Pattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
636           std::optional<size_t> Line = std::nullopt)
Context(Context)637       : Context(Context), CheckTy(Ty), LineNumber(Line) {}
638 
639   /// \returns the location in source code.
getLoc()640   SMLoc getLoc() const { return PatternLoc; }
641 
642   /// \returns the pointer to the global state for all patterns in this
643   /// FileCheck instance.
getContext()644   FileCheckPatternContext *getContext() const { return Context; }
645 
646   /// \returns whether \p C is a valid first character for a variable name.
647   static bool isValidVarNameStart(char C);
648 
649   /// Parsing information about a variable.
650   struct VariableProperties {
651     StringRef Name;
652     bool IsPseudo;
653   };
654 
655   /// Parses the string at the start of \p Str for a variable name. \returns
656   /// a VariableProperties structure holding the variable name and whether it
657   /// is the name of a pseudo variable, or an error holding a diagnostic
658   /// against \p SM if parsing fail. If parsing was successful, also strips
659   /// \p Str from the variable name.
660   static Expected<VariableProperties> parseVariable(StringRef &Str,
661                                                     const SourceMgr &SM);
662   /// Parses \p Expr for a numeric substitution block at line \p LineNumber,
663   /// or before input is parsed if \p LineNumber is None. Parameter
664   /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
665   /// expression and \p Context points to the class instance holding the live
666   /// string and numeric variables. \returns a pointer to the class instance
667   /// representing the expression whose value must be substitued, or an error
668   /// holding a diagnostic against \p SM if parsing fails. If substitution was
669   /// successful, sets \p DefinedNumericVariable to point to the class
670   /// representing the numeric variable defined in this numeric substitution
671   /// block, or std::nullopt if this block does not define any variable.
672   static Expected<std::unique_ptr<Expression>> parseNumericSubstitutionBlock(
673       StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable,
674       bool IsLegacyLineExpr, std::optional<size_t> LineNumber,
675       FileCheckPatternContext *Context, const SourceMgr &SM);
676   /// Parses the pattern in \p PatternStr and initializes this Pattern instance
677   /// accordingly.
678   ///
679   /// \p Prefix provides which prefix is being matched, \p Req describes the
680   /// global options that influence the parsing such as whitespace
681   /// canonicalization, \p SM provides the SourceMgr used for error reports.
682   /// \returns true in case of an error, false otherwise.
683   bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
684                     const FileCheckRequest &Req);
685   struct Match {
686     size_t Pos;
687     size_t Len;
688   };
689   struct MatchResult {
690     std::optional<Match> TheMatch;
691     Error TheError;
MatchResultMatchResult692     MatchResult(size_t MatchPos, size_t MatchLen, Error E)
693         : TheMatch(Match{MatchPos, MatchLen}), TheError(std::move(E)) {}
MatchResultMatchResult694     MatchResult(Match M, Error E) : TheMatch(M), TheError(std::move(E)) {}
MatchResultMatchResult695     MatchResult(Error E) : TheError(std::move(E)) {}
696   };
697   /// Matches the pattern string against the input buffer \p Buffer.
698   ///
699   /// \returns either (1) an error resulting in no match or (2) a match possibly
700   /// with an error encountered while processing the match.
701   ///
702   /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
703   /// instance provides the current values of FileCheck string variables and is
704   /// updated if this match defines new values. Likewise, the
705   /// GlobalNumericVariableTable StringMap in the same class provides the
706   /// current values of FileCheck numeric variables and is updated if this
707   /// match defines new numeric values.
708   MatchResult match(StringRef Buffer, const SourceMgr &SM) const;
709   /// Prints the value of successful substitutions.
710   void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
711                           SMRange MatchRange, FileCheckDiag::MatchType MatchTy,
712                           std::vector<FileCheckDiag> *Diags) const;
713   void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
714                        std::vector<FileCheckDiag> *Diags) const;
715 
hasVariable()716   bool hasVariable() const {
717     return !(Substitutions.empty() && VariableDefs.empty());
718   }
719   void printVariableDefs(const SourceMgr &SM, FileCheckDiag::MatchType MatchTy,
720                          std::vector<FileCheckDiag> *Diags) const;
721 
getCheckTy()722   Check::FileCheckType getCheckTy() const { return CheckTy; }
723 
getCount()724   int getCount() const { return CheckTy.getCount(); }
725 
726 private:
727   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
728   void AddBackrefToRegEx(unsigned BackrefNum);
729   /// Computes an arbitrary estimate for the quality of matching this pattern
730   /// at the start of \p Buffer; a distance of zero should correspond to a
731   /// perfect match.
732   unsigned computeMatchDistance(StringRef Buffer) const;
733   /// Finds the closing sequence of a regex variable usage or definition.
734   ///
735   /// \p Str has to point in the beginning of the definition (right after the
736   /// opening sequence). \p SM holds the SourceMgr used for error reporting.
737   ///  \returns the offset of the closing sequence within Str, or npos if it
738   /// was not found.
739   static size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
740 
741   /// Parses \p Expr for the name of a numeric variable to be defined at line
742   /// \p LineNumber, or before input is parsed if \p LineNumber is None.
743   /// \returns a pointer to the class instance representing that variable,
744   /// creating it if needed, or an error holding a diagnostic against \p SM
745   /// should defining such a variable be invalid.
746   static Expected<NumericVariable *> parseNumericVariableDefinition(
747       StringRef &Expr, FileCheckPatternContext *Context,
748       std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat,
749       const SourceMgr &SM);
750   /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use
751   /// at line \p LineNumber, or before input is parsed if \p LineNumber is
752   /// None. Parameter \p Context points to the class instance holding the live
753   /// string and numeric variables. \returns the pointer to the class instance
754   /// representing that variable if successful, or an error holding a
755   /// diagnostic against \p SM otherwise.
756   static Expected<std::unique_ptr<NumericVariableUse>> parseNumericVariableUse(
757       StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber,
758       FileCheckPatternContext *Context, const SourceMgr &SM);
759   enum class AllowedOperand { LineVar, LegacyLiteral, Any };
760   /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or
761   /// before input is parsed if \p LineNumber is None. Accepts literal values,
762   /// numeric variables and function calls, depending on the value of \p AO.
763   /// \p MaybeInvalidConstraint indicates whether the text being parsed could
764   /// be an invalid constraint. \p Context points to the class instance holding
765   /// the live string and numeric variables. \returns the class representing
766   /// that operand in the AST of the expression or an error holding a
767   /// diagnostic against \p SM otherwise. If \p Expr starts with a "(" this
768   /// function will attempt to parse a parenthesized expression.
769   static Expected<std::unique_ptr<ExpressionAST>>
770   parseNumericOperand(StringRef &Expr, AllowedOperand AO, bool ConstraintParsed,
771                       std::optional<size_t> LineNumber,
772                       FileCheckPatternContext *Context, const SourceMgr &SM);
773   /// Parses and updates \p RemainingExpr for a binary operation at line
774   /// \p LineNumber, or before input is parsed if \p LineNumber is None. The
775   /// left operand of this binary operation is given in \p LeftOp and \p Expr
776   /// holds the string for the full expression, including the left operand.
777   /// Parameter \p IsLegacyLineExpr indicates whether we are parsing a legacy
778   /// @LINE expression. Parameter \p Context points to the class instance
779   /// holding the live string and numeric variables. \returns the class
780   /// representing the binary operation in the AST of the expression, or an
781   /// error holding a diagnostic against \p SM otherwise.
782   static Expected<std::unique_ptr<ExpressionAST>>
783   parseBinop(StringRef Expr, StringRef &RemainingExpr,
784              std::unique_ptr<ExpressionAST> LeftOp, bool IsLegacyLineExpr,
785              std::optional<size_t> LineNumber, FileCheckPatternContext *Context,
786              const SourceMgr &SM);
787 
788   /// Parses a parenthesized expression inside \p Expr at line \p LineNumber, or
789   /// before input is parsed if \p LineNumber is None. \p Expr must start with
790   /// a '('. Accepts both literal values and numeric variables. Parameter \p
791   /// Context points to the class instance holding the live string and numeric
792   /// variables. \returns the class representing that operand in the AST of the
793   /// expression or an error holding a diagnostic against \p SM otherwise.
794   static Expected<std::unique_ptr<ExpressionAST>>
795   parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
796                  FileCheckPatternContext *Context, const SourceMgr &SM);
797 
798   /// Parses \p Expr for an argument list belonging to a call to function \p
799   /// FuncName at line \p LineNumber, or before input is parsed if \p LineNumber
800   /// is None. Parameter \p FuncLoc is the source location used for diagnostics.
801   /// Parameter \p Context points to the class instance holding the live string
802   /// and numeric variables. \returns the class representing that call in the
803   /// AST of the expression or an error holding a diagnostic against \p SM
804   /// otherwise.
805   static Expected<std::unique_ptr<ExpressionAST>>
806   parseCallExpr(StringRef &Expr, StringRef FuncName,
807                 std::optional<size_t> LineNumber,
808                 FileCheckPatternContext *Context, const SourceMgr &SM);
809 };
810 
811 //===----------------------------------------------------------------------===//
812 // Check Strings.
813 //===----------------------------------------------------------------------===//
814 
815 /// A check that we found in the input file.
816 struct FileCheckString {
817   /// The pattern to match.
818   Pattern Pat;
819 
820   /// Which prefix name this check matched.
821   StringRef Prefix;
822 
823   /// The location in the match file that the check string was specified.
824   SMLoc Loc;
825 
826   /// Hold the information about the DAG/NOT strings in the program, which are
827   /// not explicitly stored otherwise. This allows for better and more accurate
828   /// diagnostic messages.
829   struct DagNotPrefixInfo {
830     Pattern DagNotPat;
831     StringRef DagNotPrefix;
832 
DagNotPrefixInfoFileCheckString::DagNotPrefixInfo833     DagNotPrefixInfo(const Pattern &P, StringRef S)
834         : DagNotPat(P), DagNotPrefix(S) {}
835   };
836 
837   /// Hold the DAG/NOT strings occurring in the input file.
838   std::vector<DagNotPrefixInfo> DagNotStrings;
839 
FileCheckStringFileCheckString840   FileCheckString(const Pattern &P, StringRef S, SMLoc L)
841       : Pat(P), Prefix(S), Loc(L) {}
842 
843   /// Matches check string and its "not strings" and/or "dag strings".
844   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
845                size_t &MatchLen, FileCheckRequest &Req,
846                std::vector<FileCheckDiag> *Diags) const;
847 
848   /// Verifies that there is a single line in the given \p Buffer. Errors are
849   /// reported against \p SM.
850   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
851   /// Verifies that there is no newline in the given \p Buffer. Errors are
852   /// reported against \p SM.
853   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
854   /// Verifies that none of the strings in \p NotStrings are found in the given
855   /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
856   /// \p Diags according to the verbosity level set in \p Req.
857   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
858                 const std::vector<const DagNotPrefixInfo *> &NotStrings,
859                 const FileCheckRequest &Req,
860                 std::vector<FileCheckDiag> *Diags) const;
861   /// Matches "dag strings" and their mixed "not strings".
862   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
863                   std::vector<const DagNotPrefixInfo *> &NotStrings,
864                   const FileCheckRequest &Req,
865                   std::vector<FileCheckDiag> *Diags) const;
866 };
867 
868 } // namespace llvm
869 
870 #endif
871