1 //===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the private interfaces of FileCheck. Its purpose is to
10 // allow unit testing of FileCheck and to separate the interface from the
11 // implementation. It is only meant to be used by FileCheck.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_LIB_FILECHECK_FILECHECKIMPL_H
16 #define LLVM_LIB_FILECHECK_FILECHECKIMPL_H
17 
18 #include "llvm/ADT/StringMap.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/FileCheck/FileCheck.h"
21 #include "llvm/Support/Error.h"
22 #include "llvm/Support/SourceMgr.h"
23 #include <map>
24 #include <optional>
25 #include <string>
26 #include <vector>
27 
28 namespace llvm {
29 
30 //===----------------------------------------------------------------------===//
31 // Numeric substitution handling code.
32 //===----------------------------------------------------------------------===//
33 
34 class ExpressionValue;
35 
36 /// Type representing the format an expression value should be textualized into
37 /// for matching. Used to represent both explicit format specifiers as well as
38 /// implicit format from using numeric variables.
39 struct ExpressionFormat {
40   enum class Kind {
41     /// Denote absence of format. Used for implicit format of literals and
42     /// empty expressions.
43     NoFormat,
44     /// Value is an unsigned integer and should be printed as a decimal number.
45     Unsigned,
46     /// Value is a signed integer and should be printed as a decimal number.
47     Signed,
48     /// Value should be printed as an uppercase hex number.
49     HexUpper,
50     /// Value should be printed as a lowercase hex number.
51     HexLower
52   };
53 
54 private:
55   Kind Value;
56   unsigned Precision = 0;
57   /// printf-like "alternate form" selected.
58   bool AlternateForm = false;
59 
60 public:
61   /// Evaluates a format to true if it can be used in a match.
62   explicit operator bool() const { return Value != Kind::NoFormat; }
63 
64   /// Define format equality: formats are equal if neither is NoFormat and
65   /// their kinds and precision are the same.
66   bool operator==(const ExpressionFormat &Other) const {
67     return Value != Kind::NoFormat && Value == Other.Value &&
68            Precision == Other.Precision && AlternateForm == Other.AlternateForm;
69   }
70 
71   bool operator!=(const ExpressionFormat &Other) const {
72     return !(*this == Other);
73   }
74 
75   bool operator==(Kind OtherValue) const { return Value == OtherValue; }
76 
77   bool operator!=(Kind OtherValue) const { return !(*this == OtherValue); }
78 
79   /// \returns the format specifier corresponding to this format as a string.
80   StringRef toString() const;
81 
82   ExpressionFormat() : Value(Kind::NoFormat){};
83   explicit ExpressionFormat(Kind Value) : Value(Value), Precision(0){};
84   explicit ExpressionFormat(Kind Value, unsigned Precision)
85       : Value(Value), Precision(Precision){};
86   explicit ExpressionFormat(Kind Value, unsigned Precision, bool AlternateForm)
87       : Value(Value), Precision(Precision), AlternateForm(AlternateForm){};
88 
89   /// \returns a wildcard regular expression string that matches any value in
90   /// the format represented by this instance and no other value, or an error
91   /// if the format is NoFormat.
92   Expected<std::string> getWildcardRegex() const;
93 
94   /// \returns the string representation of \p Value in the format represented
95   /// by this instance, or an error if conversion to this format failed or the
96   /// format is NoFormat.
97   Expected<std::string> getMatchingString(ExpressionValue Value) const;
98 
99   /// \returns the value corresponding to string representation \p StrVal
100   /// according to the matching format represented by this instance or an error
101   /// with diagnostic against \p SM if \p StrVal does not correspond to a valid
102   /// and representable value.
103   Expected<ExpressionValue> valueFromStringRepr(StringRef StrVal,
104                                                 const SourceMgr &SM) const;
105 };
106 
107 /// Class to represent an overflow error that might result when manipulating a
108 /// value.
109 class OverflowError : public ErrorInfo<OverflowError> {
110 public:
111   static char ID;
112 
113   std::error_code convertToErrorCode() const override {
114     return std::make_error_code(std::errc::value_too_large);
115   }
116 
117   void log(raw_ostream &OS) const override { OS << "overflow error"; }
118 };
119 
120 /// Class representing a numeric value.
121 class ExpressionValue {
122 private:
123   uint64_t Value;
124   bool Negative;
125 
126 public:
127   template <class T>
128   explicit ExpressionValue(T Val) : Value(Val), Negative(Val < 0) {}
129 
130   bool operator==(const ExpressionValue &Other) const {
131     return Value == Other.Value && isNegative() == Other.isNegative();
132   }
133 
134   bool operator!=(const ExpressionValue &Other) const {
135     return !(*this == Other);
136   }
137 
138   /// Returns true if value is signed and negative, false otherwise.
139   bool isNegative() const {
140     assert((Value != 0 || !Negative) && "Unexpected negative zero!");
141     return Negative;
142   }
143 
144   /// \returns the value as a signed integer or an error if the value is out of
145   /// range.
146   Expected<int64_t> getSignedValue() const;
147 
148   /// \returns the value as an unsigned integer or an error if the value is out
149   /// of range.
150   Expected<uint64_t> getUnsignedValue() const;
151 
152   /// \returns an unsigned ExpressionValue instance whose value is the absolute
153   /// value to this object's value.
154   ExpressionValue getAbsolute() const;
155 };
156 
157 /// Performs operation and \returns its result or an error in case of failure,
158 /// such as if an overflow occurs.
159 Expected<ExpressionValue> operator+(const ExpressionValue &Lhs,
160                                     const ExpressionValue &Rhs);
161 Expected<ExpressionValue> operator-(const ExpressionValue &Lhs,
162                                     const ExpressionValue &Rhs);
163 Expected<ExpressionValue> operator*(const ExpressionValue &Lhs,
164                                     const ExpressionValue &Rhs);
165 Expected<ExpressionValue> operator/(const ExpressionValue &Lhs,
166                                     const ExpressionValue &Rhs);
167 Expected<ExpressionValue> max(const ExpressionValue &Lhs,
168                               const ExpressionValue &Rhs);
169 Expected<ExpressionValue> min(const ExpressionValue &Lhs,
170                               const ExpressionValue &Rhs);
171 
172 /// Base class representing the AST of a given expression.
173 class ExpressionAST {
174 private:
175   StringRef ExpressionStr;
176 
177 public:
178   ExpressionAST(StringRef ExpressionStr) : ExpressionStr(ExpressionStr) {}
179 
180   virtual ~ExpressionAST() = default;
181 
182   StringRef getExpressionStr() const { return ExpressionStr; }
183 
184   /// Evaluates and \returns the value of the expression represented by this
185   /// AST or an error if evaluation fails.
186   virtual Expected<ExpressionValue> eval() const = 0;
187 
188   /// \returns either the implicit format of this AST, a diagnostic against
189   /// \p SM if implicit formats of the AST's components conflict, or NoFormat
190   /// if the AST has no implicit format (e.g. AST is made up of a single
191   /// literal).
192   virtual Expected<ExpressionFormat>
193   getImplicitFormat(const SourceMgr &SM) const {
194     return ExpressionFormat();
195   }
196 };
197 
198 /// Class representing an unsigned literal in the AST of an expression.
199 class ExpressionLiteral : public ExpressionAST {
200 private:
201   /// Actual value of the literal.
202   ExpressionValue Value;
203 
204 public:
205   template <class T>
206   explicit ExpressionLiteral(StringRef ExpressionStr, T Val)
207       : ExpressionAST(ExpressionStr), Value(Val) {}
208 
209   /// \returns the literal's value.
210   Expected<ExpressionValue> eval() const override { return Value; }
211 };
212 
213 /// Class to represent an undefined variable error, which quotes that
214 /// variable's name when printed.
215 class UndefVarError : public ErrorInfo<UndefVarError> {
216 private:
217   StringRef VarName;
218 
219 public:
220   static char ID;
221 
222   UndefVarError(StringRef VarName) : VarName(VarName) {}
223 
224   StringRef getVarName() const { return VarName; }
225 
226   std::error_code convertToErrorCode() const override {
227     return inconvertibleErrorCode();
228   }
229 
230   /// Print name of variable associated with this error.
231   void log(raw_ostream &OS) const override {
232     OS << "undefined variable: " << VarName;
233   }
234 };
235 
236 /// Class representing an expression and its matching format.
237 class Expression {
238 private:
239   /// Pointer to AST of the expression.
240   std::unique_ptr<ExpressionAST> AST;
241 
242   /// Format to use (e.g. hex upper case letters) when matching the value.
243   ExpressionFormat Format;
244 
245 public:
246   /// Generic constructor for an expression represented by the given \p AST and
247   /// whose matching format is \p Format.
248   Expression(std::unique_ptr<ExpressionAST> AST, ExpressionFormat Format)
249       : AST(std::move(AST)), Format(Format) {}
250 
251   /// \returns pointer to AST of the expression. Pointer is guaranteed to be
252   /// valid as long as this object is.
253   ExpressionAST *getAST() const { return AST.get(); }
254 
255   ExpressionFormat getFormat() const { return Format; }
256 };
257 
258 /// Class representing a numeric variable and its associated current value.
259 class NumericVariable {
260 private:
261   /// Name of the numeric variable.
262   StringRef Name;
263 
264   /// Format to use for expressions using this variable without an explicit
265   /// format.
266   ExpressionFormat ImplicitFormat;
267 
268   /// Value of numeric variable, if defined, or std::nullopt otherwise.
269   std::optional<ExpressionValue> Value;
270 
271   /// The input buffer's string from which Value was parsed, or std::nullopt.
272   /// See comments on getStringValue for a discussion of the None case.
273   std::optional<StringRef> StrValue;
274 
275   /// Line number where this variable is defined, or std::nullopt if defined
276   /// before input is parsed. Used to determine whether a variable is defined on
277   /// the same line as a given use.
278   std::optional<size_t> DefLineNumber;
279 
280 public:
281   /// Constructor for a variable \p Name with implicit format \p ImplicitFormat
282   /// defined at line \p DefLineNumber or defined before input is parsed if
283   /// \p DefLineNumber is None.
284   explicit NumericVariable(StringRef Name, ExpressionFormat ImplicitFormat,
285                            std::optional<size_t> DefLineNumber = std::nullopt)
286       : Name(Name), ImplicitFormat(ImplicitFormat),
287         DefLineNumber(DefLineNumber) {}
288 
289   /// \returns name of this numeric variable.
290   StringRef getName() const { return Name; }
291 
292   /// \returns implicit format of this numeric variable.
293   ExpressionFormat getImplicitFormat() const { return ImplicitFormat; }
294 
295   /// \returns this variable's value.
296   std::optional<ExpressionValue> getValue() const { return Value; }
297 
298   /// \returns the input buffer's string from which this variable's value was
299   /// parsed, or std::nullopt if the value is not yet defined or was not parsed
300   /// from the input buffer.  For example, the value of @LINE is not parsed from
301   /// the input buffer, and some numeric variables are parsed from the command
302   /// line instead.
303   std::optional<StringRef> getStringValue() const { return StrValue; }
304 
305   /// Sets value of this numeric variable to \p NewValue, and sets the input
306   /// buffer string from which it was parsed to \p NewStrValue.  See comments on
307   /// getStringValue for a discussion of when the latter can be None.
308   void setValue(ExpressionValue NewValue,
309                 std::optional<StringRef> NewStrValue = std::nullopt) {
310     Value = NewValue;
311     StrValue = NewStrValue;
312   }
313 
314   /// Clears value of this numeric variable, regardless of whether it is
315   /// currently defined or not.
316   void clearValue() {
317     Value = std::nullopt;
318     StrValue = std::nullopt;
319   }
320 
321   /// \returns the line number where this variable is defined, if any, or
322   /// std::nullopt if defined before input is parsed.
323   std::optional<size_t> getDefLineNumber() const { return DefLineNumber; }
324 };
325 
326 /// Class representing the use of a numeric variable in the AST of an
327 /// expression.
328 class NumericVariableUse : public ExpressionAST {
329 private:
330   /// Pointer to the class instance for the variable this use is about.
331   NumericVariable *Variable;
332 
333 public:
334   NumericVariableUse(StringRef Name, NumericVariable *Variable)
335       : ExpressionAST(Name), Variable(Variable) {}
336   /// \returns the value of the variable referenced by this instance.
337   Expected<ExpressionValue> eval() const override;
338 
339   /// \returns implicit format of this numeric variable.
340   Expected<ExpressionFormat>
341   getImplicitFormat(const SourceMgr &SM) const override {
342     return Variable->getImplicitFormat();
343   }
344 };
345 
346 /// Type of functions evaluating a given binary operation.
347 using binop_eval_t = Expected<ExpressionValue> (*)(const ExpressionValue &,
348                                                    const ExpressionValue &);
349 
350 /// Class representing a single binary operation in the AST of an expression.
351 class BinaryOperation : public ExpressionAST {
352 private:
353   /// Left operand.
354   std::unique_ptr<ExpressionAST> LeftOperand;
355 
356   /// Right operand.
357   std::unique_ptr<ExpressionAST> RightOperand;
358 
359   /// Pointer to function that can evaluate this binary operation.
360   binop_eval_t EvalBinop;
361 
362 public:
363   BinaryOperation(StringRef ExpressionStr, binop_eval_t EvalBinop,
364                   std::unique_ptr<ExpressionAST> LeftOp,
365                   std::unique_ptr<ExpressionAST> RightOp)
366       : ExpressionAST(ExpressionStr), EvalBinop(EvalBinop) {
367     LeftOperand = std::move(LeftOp);
368     RightOperand = std::move(RightOp);
369   }
370 
371   /// Evaluates the value of the binary operation represented by this AST,
372   /// using EvalBinop on the result of recursively evaluating the operands.
373   /// \returns the expression value or an error if an undefined numeric
374   /// variable is used in one of the operands.
375   Expected<ExpressionValue> eval() const override;
376 
377   /// \returns the implicit format of this AST, if any, a diagnostic against
378   /// \p SM if the implicit formats of the AST's components conflict, or no
379   /// format if the AST has no implicit format (e.g. AST is made of a single
380   /// literal).
381   Expected<ExpressionFormat>
382   getImplicitFormat(const SourceMgr &SM) const override;
383 };
384 
385 class FileCheckPatternContext;
386 
387 /// Class representing a substitution to perform in the RegExStr string.
388 class Substitution {
389 protected:
390   /// Pointer to a class instance holding, among other things, the table with
391   /// the values of live string variables at the start of any given CHECK line.
392   /// Used for substituting string variables with the text they were defined
393   /// as. Expressions are linked to the numeric variables they use at
394   /// parse time and directly access the value of the numeric variable to
395   /// evaluate their value.
396   FileCheckPatternContext *Context;
397 
398   /// The string that needs to be substituted for something else. For a
399   /// string variable this is its name, otherwise this is the whole expression.
400   StringRef FromStr;
401 
402   // Index in RegExStr of where to do the substitution.
403   size_t InsertIdx;
404 
405 public:
406   Substitution(FileCheckPatternContext *Context, StringRef VarName,
407                size_t InsertIdx)
408       : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {}
409 
410   virtual ~Substitution() = default;
411 
412   /// \returns the string to be substituted for something else.
413   StringRef getFromString() const { return FromStr; }
414 
415   /// \returns the index where the substitution is to be performed in RegExStr.
416   size_t getIndex() const { return InsertIdx; }
417 
418   /// \returns a string containing the result of the substitution represented
419   /// by this class instance or an error if substitution failed.
420   virtual Expected<std::string> getResult() const = 0;
421 };
422 
423 class StringSubstitution : public Substitution {
424 public:
425   StringSubstitution(FileCheckPatternContext *Context, StringRef VarName,
426                      size_t InsertIdx)
427       : Substitution(Context, VarName, InsertIdx) {}
428 
429   /// \returns the text that the string variable in this substitution matched
430   /// when defined, or an error if the variable is undefined.
431   Expected<std::string> getResult() const override;
432 };
433 
434 class NumericSubstitution : public Substitution {
435 private:
436   /// Pointer to the class representing the expression whose value is to be
437   /// substituted.
438   std::unique_ptr<Expression> ExpressionPointer;
439 
440 public:
441   NumericSubstitution(FileCheckPatternContext *Context, StringRef ExpressionStr,
442                       std::unique_ptr<Expression> ExpressionPointer,
443                       size_t InsertIdx)
444       : Substitution(Context, ExpressionStr, InsertIdx),
445         ExpressionPointer(std::move(ExpressionPointer)) {}
446 
447   /// \returns a string containing the result of evaluating the expression in
448   /// this substitution, or an error if evaluation failed.
449   Expected<std::string> getResult() const override;
450 };
451 
452 //===----------------------------------------------------------------------===//
453 // Pattern handling code.
454 //===----------------------------------------------------------------------===//
455 
456 /// Class holding the Pattern global state, shared by all patterns: tables
457 /// holding values of variables and whether they are defined or not at any
458 /// given time in the matching process.
459 class FileCheckPatternContext {
460   friend class Pattern;
461 
462 private:
463   /// When matching a given pattern, this holds the value of all the string
464   /// variables defined in previous patterns. In a pattern, only the last
465   /// definition for a given variable is recorded in this table.
466   /// Back-references are used for uses after any the other definition.
467   StringMap<StringRef> GlobalVariableTable;
468 
469   /// Map of all string variables defined so far. Used at parse time to detect
470   /// a name conflict between a numeric variable and a string variable when
471   /// the former is defined on a later line than the latter.
472   StringMap<bool> DefinedVariableTable;
473 
474   /// When matching a given pattern, this holds the pointers to the classes
475   /// representing the numeric variables defined in previous patterns. When
476   /// matching a pattern all definitions for that pattern are recorded in the
477   /// NumericVariableDefs table in the Pattern instance of that pattern.
478   StringMap<NumericVariable *> GlobalNumericVariableTable;
479 
480   /// Pointer to the class instance representing the @LINE pseudo variable for
481   /// easily updating its value.
482   NumericVariable *LineVariable = nullptr;
483 
484   /// Vector holding pointers to all parsed numeric variables. Used to
485   /// automatically free them once they are guaranteed to no longer be used.
486   std::vector<std::unique_ptr<NumericVariable>> NumericVariables;
487 
488   /// Vector holding pointers to all parsed expressions. Used to automatically
489   /// free the expressions once they are guaranteed to no longer be used.
490   std::vector<std::unique_ptr<Expression>> Expressions;
491 
492   /// Vector holding pointers to all substitutions. Used to automatically free
493   /// them once they are guaranteed to no longer be used.
494   std::vector<std::unique_ptr<Substitution>> Substitutions;
495 
496 public:
497   /// \returns the value of string variable \p VarName or an error if no such
498   /// variable has been defined.
499   Expected<StringRef> getPatternVarValue(StringRef VarName);
500 
501   /// Defines string and numeric variables from definitions given on the
502   /// command line, passed as a vector of [#]VAR=VAL strings in
503   /// \p CmdlineDefines. \returns an error list containing diagnostics against
504   /// \p SM for all definition parsing failures, if any, or Success otherwise.
505   Error defineCmdlineVariables(ArrayRef<StringRef> CmdlineDefines,
506                                SourceMgr &SM);
507 
508   /// Create @LINE pseudo variable. Value is set when pattern are being
509   /// matched.
510   void createLineVariable();
511 
512   /// Undefines local variables (variables whose name does not start with a '$'
513   /// sign), i.e. removes them from GlobalVariableTable and from
514   /// GlobalNumericVariableTable and also clears the value of numeric
515   /// variables.
516   void clearLocalVars();
517 
518 private:
519   /// Makes a new numeric variable and registers it for destruction when the
520   /// context is destroyed.
521   template <class... Types> NumericVariable *makeNumericVariable(Types... args);
522 
523   /// Makes a new string substitution and registers it for destruction when the
524   /// context is destroyed.
525   Substitution *makeStringSubstitution(StringRef VarName, size_t InsertIdx);
526 
527   /// Makes a new numeric substitution and registers it for destruction when
528   /// the context is destroyed.
529   Substitution *makeNumericSubstitution(StringRef ExpressionStr,
530                                         std::unique_ptr<Expression> Expression,
531                                         size_t InsertIdx);
532 };
533 
534 /// Class to represent an error holding a diagnostic with location information
535 /// used when printing it.
536 class ErrorDiagnostic : public ErrorInfo<ErrorDiagnostic> {
537 private:
538   SMDiagnostic Diagnostic;
539   SMRange Range;
540 
541 public:
542   static char ID;
543 
544   ErrorDiagnostic(SMDiagnostic &&Diag, SMRange Range)
545       : Diagnostic(Diag), Range(Range) {}
546 
547   std::error_code convertToErrorCode() const override {
548     return inconvertibleErrorCode();
549   }
550 
551   /// Print diagnostic associated with this error when printing the error.
552   void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); }
553 
554   StringRef getMessage() const { return Diagnostic.getMessage(); }
555   SMRange getRange() const { return Range; }
556 
557   static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg,
558                    SMRange Range = std::nullopt) {
559     return make_error<ErrorDiagnostic>(
560         SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg), Range);
561   }
562 
563   static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) {
564     SMLoc Start = SMLoc::getFromPointer(Buffer.data());
565     SMLoc End = SMLoc::getFromPointer(Buffer.data() + Buffer.size());
566     return get(SM, Start, ErrMsg, SMRange(Start, End));
567   }
568 };
569 
570 class NotFoundError : public ErrorInfo<NotFoundError> {
571 public:
572   static char ID;
573 
574   std::error_code convertToErrorCode() const override {
575     return inconvertibleErrorCode();
576   }
577 
578   /// Print diagnostic associated with this error when printing the error.
579   void log(raw_ostream &OS) const override {
580     OS << "String not found in input";
581   }
582 };
583 
584 /// An error that has already been reported.
585 ///
586 /// This class is designed to support a function whose callers may need to know
587 /// whether the function encountered and reported an error but never need to
588 /// know the nature of that error.  For example, the function has a return type
589 /// of \c Error and always returns either \c ErrorReported or \c ErrorSuccess.
590 /// That interface is similar to that of a function returning bool to indicate
591 /// an error except, in the former case, (1) there is no confusion over polarity
592 /// and (2) the caller must either check the result or explicitly ignore it with
593 /// a call like \c consumeError.
594 class ErrorReported final : public ErrorInfo<ErrorReported> {
595 public:
596   static char ID;
597 
598   std::error_code convertToErrorCode() const override {
599     return inconvertibleErrorCode();
600   }
601 
602   /// Print diagnostic associated with this error when printing the error.
603   void log(raw_ostream &OS) const override {
604     OS << "error previously reported";
605   }
606 
607   static inline Error reportedOrSuccess(bool HasErrorReported) {
608     if (HasErrorReported)
609       return make_error<ErrorReported>();
610     return Error::success();
611   }
612 };
613 
614 class Pattern {
615   SMLoc PatternLoc;
616 
617   /// A fixed string to match as the pattern or empty if this pattern requires
618   /// a regex match.
619   StringRef FixedStr;
620 
621   /// A regex string to match as the pattern or empty if this pattern requires
622   /// a fixed string to match.
623   std::string RegExStr;
624 
625   /// Entries in this vector represent a substitution of a string variable or
626   /// an expression in the RegExStr regex at match time. For example, in the
627   /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]",
628   /// RegExStr will contain "foobaz" and we'll get two entries in this vector
629   /// that tells us to insert the value of string variable "bar" at offset 3
630   /// and the value of expression "N+1" at offset 6.
631   std::vector<Substitution *> Substitutions;
632 
633   /// Maps names of string variables defined in a pattern to the number of
634   /// their parenthesis group in RegExStr capturing their last definition.
635   ///
636   /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])",
637   /// RegExStr will be "foo(.*)baz(\1<quux value>(.*))" where <quux value> is
638   /// the value captured for QUUX on the earlier line where it was defined, and
639   /// VariableDefs will map "bar" to the third parenthesis group which captures
640   /// the second definition of "bar".
641   ///
642   /// Note: uses std::map rather than StringMap to be able to get the key when
643   /// iterating over values.
644   std::map<StringRef, unsigned> VariableDefs;
645 
646   /// Structure representing the definition of a numeric variable in a pattern.
647   /// It holds the pointer to the class instance holding the value and matching
648   /// format of the numeric variable whose value is being defined and the
649   /// number of the parenthesis group in RegExStr to capture that value.
650   struct NumericVariableMatch {
651     /// Pointer to class instance holding the value and matching format of the
652     /// numeric variable being defined.
653     NumericVariable *DefinedNumericVariable;
654 
655     /// Number of the parenthesis group in RegExStr that captures the value of
656     /// this numeric variable definition.
657     unsigned CaptureParenGroup;
658   };
659 
660   /// Holds the number of the parenthesis group in RegExStr and pointer to the
661   /// corresponding NumericVariable class instance of all numeric variable
662   /// definitions. Used to set the matched value of all those variables.
663   StringMap<NumericVariableMatch> NumericVariableDefs;
664 
665   /// Pointer to a class instance holding the global state shared by all
666   /// patterns:
667   /// - separate tables with the values of live string and numeric variables
668   ///   respectively at the start of any given CHECK line;
669   /// - table holding whether a string variable has been defined at any given
670   ///   point during the parsing phase.
671   FileCheckPatternContext *Context;
672 
673   Check::FileCheckType CheckTy;
674 
675   /// Line number for this CHECK pattern or std::nullopt if it is an implicit
676   /// pattern. Used to determine whether a variable definition is made on an
677   /// earlier line to the one with this CHECK.
678   std::optional<size_t> LineNumber;
679 
680   /// Ignore case while matching if set to true.
681   bool IgnoreCase = false;
682 
683 public:
684   Pattern(Check::FileCheckType Ty, FileCheckPatternContext *Context,
685           std::optional<size_t> Line = std::nullopt)
686       : Context(Context), CheckTy(Ty), LineNumber(Line) {}
687 
688   /// \returns the location in source code.
689   SMLoc getLoc() const { return PatternLoc; }
690 
691   /// \returns the pointer to the global state for all patterns in this
692   /// FileCheck instance.
693   FileCheckPatternContext *getContext() const { return Context; }
694 
695   /// \returns whether \p C is a valid first character for a variable name.
696   static bool isValidVarNameStart(char C);
697 
698   /// Parsing information about a variable.
699   struct VariableProperties {
700     StringRef Name;
701     bool IsPseudo;
702   };
703 
704   /// Parses the string at the start of \p Str for a variable name. \returns
705   /// a VariableProperties structure holding the variable name and whether it
706   /// is the name of a pseudo variable, or an error holding a diagnostic
707   /// against \p SM if parsing fail. If parsing was successful, also strips
708   /// \p Str from the variable name.
709   static Expected<VariableProperties> parseVariable(StringRef &Str,
710                                                     const SourceMgr &SM);
711   /// Parses \p Expr for a numeric substitution block at line \p LineNumber,
712   /// or before input is parsed if \p LineNumber is None. Parameter
713   /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE
714   /// expression and \p Context points to the class instance holding the live
715   /// string and numeric variables. \returns a pointer to the class instance
716   /// representing the expression whose value must be substitued, or an error
717   /// holding a diagnostic against \p SM if parsing fails. If substitution was
718   /// successful, sets \p DefinedNumericVariable to point to the class
719   /// representing the numeric variable defined in this numeric substitution
720   /// block, or std::nullopt if this block does not define any variable.
721   static Expected<std::unique_ptr<Expression>> parseNumericSubstitutionBlock(
722       StringRef Expr, std::optional<NumericVariable *> &DefinedNumericVariable,
723       bool IsLegacyLineExpr, std::optional<size_t> LineNumber,
724       FileCheckPatternContext *Context, const SourceMgr &SM);
725   /// Parses the pattern in \p PatternStr and initializes this Pattern instance
726   /// accordingly.
727   ///
728   /// \p Prefix provides which prefix is being matched, \p Req describes the
729   /// global options that influence the parsing such as whitespace
730   /// canonicalization, \p SM provides the SourceMgr used for error reports.
731   /// \returns true in case of an error, false otherwise.
732   bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM,
733                     const FileCheckRequest &Req);
734   struct Match {
735     size_t Pos;
736     size_t Len;
737   };
738   struct MatchResult {
739     std::optional<Match> TheMatch;
740     Error TheError;
741     MatchResult(size_t MatchPos, size_t MatchLen, Error E)
742         : TheMatch(Match{MatchPos, MatchLen}), TheError(std::move(E)) {}
743     MatchResult(Match M, Error E) : TheMatch(M), TheError(std::move(E)) {}
744     MatchResult(Error E) : TheError(std::move(E)) {}
745   };
746   /// Matches the pattern string against the input buffer \p Buffer.
747   ///
748   /// \returns either (1) an error resulting in no match or (2) a match possibly
749   /// with an error encountered while processing the match.
750   ///
751   /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
752   /// instance provides the current values of FileCheck string variables and is
753   /// updated if this match defines new values. Likewise, the
754   /// GlobalNumericVariableTable StringMap in the same class provides the
755   /// current values of FileCheck numeric variables and is updated if this
756   /// match defines new numeric values.
757   MatchResult match(StringRef Buffer, const SourceMgr &SM) const;
758   /// Prints the value of successful substitutions.
759   void printSubstitutions(const SourceMgr &SM, StringRef Buffer,
760                           SMRange MatchRange, FileCheckDiag::MatchType MatchTy,
761                           std::vector<FileCheckDiag> *Diags) const;
762   void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
763                        std::vector<FileCheckDiag> *Diags) const;
764 
765   bool hasVariable() const {
766     return !(Substitutions.empty() && VariableDefs.empty());
767   }
768   void printVariableDefs(const SourceMgr &SM, FileCheckDiag::MatchType MatchTy,
769                          std::vector<FileCheckDiag> *Diags) const;
770 
771   Check::FileCheckType getCheckTy() const { return CheckTy; }
772 
773   int getCount() const { return CheckTy.getCount(); }
774 
775 private:
776   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
777   void AddBackrefToRegEx(unsigned BackrefNum);
778   /// Computes an arbitrary estimate for the quality of matching this pattern
779   /// at the start of \p Buffer; a distance of zero should correspond to a
780   /// perfect match.
781   unsigned computeMatchDistance(StringRef Buffer) const;
782   /// Finds the closing sequence of a regex variable usage or definition.
783   ///
784   /// \p Str has to point in the beginning of the definition (right after the
785   /// opening sequence). \p SM holds the SourceMgr used for error reporting.
786   ///  \returns the offset of the closing sequence within Str, or npos if it
787   /// was not found.
788   static size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
789 
790   /// Parses \p Expr for the name of a numeric variable to be defined at line
791   /// \p LineNumber, or before input is parsed if \p LineNumber is None.
792   /// \returns a pointer to the class instance representing that variable,
793   /// creating it if needed, or an error holding a diagnostic against \p SM
794   /// should defining such a variable be invalid.
795   static Expected<NumericVariable *> parseNumericVariableDefinition(
796       StringRef &Expr, FileCheckPatternContext *Context,
797       std::optional<size_t> LineNumber, ExpressionFormat ImplicitFormat,
798       const SourceMgr &SM);
799   /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use
800   /// at line \p LineNumber, or before input is parsed if \p LineNumber is
801   /// None. Parameter \p Context points to the class instance holding the live
802   /// string and numeric variables. \returns the pointer to the class instance
803   /// representing that variable if successful, or an error holding a
804   /// diagnostic against \p SM otherwise.
805   static Expected<std::unique_ptr<NumericVariableUse>> parseNumericVariableUse(
806       StringRef Name, bool IsPseudo, std::optional<size_t> LineNumber,
807       FileCheckPatternContext *Context, const SourceMgr &SM);
808   enum class AllowedOperand { LineVar, LegacyLiteral, Any };
809   /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or
810   /// before input is parsed if \p LineNumber is None. Accepts literal values,
811   /// numeric variables and function calls, depending on the value of \p AO.
812   /// \p MaybeInvalidConstraint indicates whether the text being parsed could
813   /// be an invalid constraint. \p Context points to the class instance holding
814   /// the live string and numeric variables. \returns the class representing
815   /// that operand in the AST of the expression or an error holding a
816   /// diagnostic against \p SM otherwise. If \p Expr starts with a "(" this
817   /// function will attempt to parse a parenthesized expression.
818   static Expected<std::unique_ptr<ExpressionAST>>
819   parseNumericOperand(StringRef &Expr, AllowedOperand AO, bool ConstraintParsed,
820                       std::optional<size_t> LineNumber,
821                       FileCheckPatternContext *Context, const SourceMgr &SM);
822   /// Parses and updates \p RemainingExpr for a binary operation at line
823   /// \p LineNumber, or before input is parsed if \p LineNumber is None. The
824   /// left operand of this binary operation is given in \p LeftOp and \p Expr
825   /// holds the string for the full expression, including the left operand.
826   /// Parameter \p IsLegacyLineExpr indicates whether we are parsing a legacy
827   /// @LINE expression. Parameter \p Context points to the class instance
828   /// holding the live string and numeric variables. \returns the class
829   /// representing the binary operation in the AST of the expression, or an
830   /// error holding a diagnostic against \p SM otherwise.
831   static Expected<std::unique_ptr<ExpressionAST>>
832   parseBinop(StringRef Expr, StringRef &RemainingExpr,
833              std::unique_ptr<ExpressionAST> LeftOp, bool IsLegacyLineExpr,
834              std::optional<size_t> LineNumber, FileCheckPatternContext *Context,
835              const SourceMgr &SM);
836 
837   /// Parses a parenthesized expression inside \p Expr at line \p LineNumber, or
838   /// before input is parsed if \p LineNumber is None. \p Expr must start with
839   /// a '('. Accepts both literal values and numeric variables. Parameter \p
840   /// Context points to the class instance holding the live string and numeric
841   /// variables. \returns the class representing that operand in the AST of the
842   /// expression or an error holding a diagnostic against \p SM otherwise.
843   static Expected<std::unique_ptr<ExpressionAST>>
844   parseParenExpr(StringRef &Expr, std::optional<size_t> LineNumber,
845                  FileCheckPatternContext *Context, const SourceMgr &SM);
846 
847   /// Parses \p Expr for an argument list belonging to a call to function \p
848   /// FuncName at line \p LineNumber, or before input is parsed if \p LineNumber
849   /// is None. Parameter \p FuncLoc is the source location used for diagnostics.
850   /// Parameter \p Context points to the class instance holding the live string
851   /// and numeric variables. \returns the class representing that call in the
852   /// AST of the expression or an error holding a diagnostic against \p SM
853   /// otherwise.
854   static Expected<std::unique_ptr<ExpressionAST>>
855   parseCallExpr(StringRef &Expr, StringRef FuncName,
856                 std::optional<size_t> LineNumber,
857                 FileCheckPatternContext *Context, const SourceMgr &SM);
858 };
859 
860 //===----------------------------------------------------------------------===//
861 // Check Strings.
862 //===----------------------------------------------------------------------===//
863 
864 /// A check that we found in the input file.
865 struct FileCheckString {
866   /// The pattern to match.
867   Pattern Pat;
868 
869   /// Which prefix name this check matched.
870   StringRef Prefix;
871 
872   /// The location in the match file that the check string was specified.
873   SMLoc Loc;
874 
875   /// All of the strings that are disallowed from occurring between this match
876   /// string and the previous one (or start of file).
877   std::vector<Pattern> DagNotStrings;
878 
879   FileCheckString(const Pattern &P, StringRef S, SMLoc L)
880       : Pat(P), Prefix(S), Loc(L) {}
881 
882   /// Matches check string and its "not strings" and/or "dag strings".
883   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
884                size_t &MatchLen, FileCheckRequest &Req,
885                std::vector<FileCheckDiag> *Diags) const;
886 
887   /// Verifies that there is a single line in the given \p Buffer. Errors are
888   /// reported against \p SM.
889   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
890   /// Verifies that there is no newline in the given \p Buffer. Errors are
891   /// reported against \p SM.
892   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
893   /// Verifies that none of the strings in \p NotStrings are found in the given
894   /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in
895   /// \p Diags according to the verbosity level set in \p Req.
896   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
897                 const std::vector<const Pattern *> &NotStrings,
898                 const FileCheckRequest &Req,
899                 std::vector<FileCheckDiag> *Diags) const;
900   /// Matches "dag strings" and their mixed "not strings".
901   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
902                   std::vector<const Pattern *> &NotStrings,
903                   const FileCheckRequest &Req,
904                   std::vector<FileCheckDiag> *Diags) const;
905 };
906 
907 } // namespace llvm
908 
909 #endif
910