1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileCheck does a line-by line check of a file that validates whether it
10 // contains the expected content.  This is useful for regression tests etc.
11 //
12 // This file implements most of the API that will be used by the FileCheck utility
13 // as well as various unittests.
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Support/FileCheck.h"
17 #include "FileCheckImpl.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Support/FormatVariadic.h"
21 #include <cstdint>
22 #include <list>
23 #include <tuple>
24 #include <utility>
25 
26 using namespace llvm;
27 
eval() const28 Expected<uint64_t> NumericVariableUse::eval() const {
29   Optional<uint64_t> Value = Variable->getValue();
30   if (Value)
31     return *Value;
32 
33   return make_error<UndefVarError>(Name);
34 }
35 
eval() const36 Expected<uint64_t> BinaryOperation::eval() const {
37   Expected<uint64_t> LeftOp = LeftOperand->eval();
38   Expected<uint64_t> RightOp = RightOperand->eval();
39 
40   // Bubble up any error (e.g. undefined variables) in the recursive
41   // evaluation.
42   if (!LeftOp || !RightOp) {
43     Error Err = Error::success();
44     if (!LeftOp)
45       Err = joinErrors(std::move(Err), LeftOp.takeError());
46     if (!RightOp)
47       Err = joinErrors(std::move(Err), RightOp.takeError());
48     return std::move(Err);
49   }
50 
51   return EvalBinop(*LeftOp, *RightOp);
52 }
53 
getResult() const54 Expected<std::string> NumericSubstitution::getResult() const {
55   Expected<uint64_t> EvaluatedValue = ExpressionASTPointer->eval();
56   if (!EvaluatedValue)
57     return EvaluatedValue.takeError();
58   return utostr(*EvaluatedValue);
59 }
60 
getResult() const61 Expected<std::string> StringSubstitution::getResult() const {
62   // Look up the value and escape it so that we can put it into the regex.
63   Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
64   if (!VarVal)
65     return VarVal.takeError();
66   return Regex::escape(*VarVal);
67 }
68 
isValidVarNameStart(char C)69 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isalpha(C); }
70 
71 Expected<Pattern::VariableProperties>
parseVariable(StringRef & Str,const SourceMgr & SM)72 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
73   if (Str.empty())
74     return ErrorDiagnostic::get(SM, Str, "empty variable name");
75 
76   bool ParsedOneChar = false;
77   unsigned I = 0;
78   bool IsPseudo = Str[0] == '@';
79 
80   // Global vars start with '$'.
81   if (Str[0] == '$' || IsPseudo)
82     ++I;
83 
84   for (unsigned E = Str.size(); I != E; ++I) {
85     if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
86       return ErrorDiagnostic::get(SM, Str, "invalid variable name");
87 
88     // Variable names are composed of alphanumeric characters and underscores.
89     if (Str[I] != '_' && !isalnum(Str[I]))
90       break;
91     ParsedOneChar = true;
92   }
93 
94   StringRef Name = Str.take_front(I);
95   Str = Str.substr(I);
96   return VariableProperties {Name, IsPseudo};
97 }
98 
99 // StringRef holding all characters considered as horizontal whitespaces by
100 // FileCheck input canonicalization.
101 constexpr StringLiteral SpaceChars = " \t";
102 
103 // Parsing helper function that strips the first character in S and returns it.
popFront(StringRef & S)104 static char popFront(StringRef &S) {
105   char C = S.front();
106   S = S.drop_front();
107   return C;
108 }
109 
110 char UndefVarError::ID = 0;
111 char ErrorDiagnostic::ID = 0;
112 char NotFoundError::ID = 0;
113 
parseNumericVariableDefinition(StringRef & Expr,FileCheckPatternContext * Context,Optional<size_t> LineNumber,const SourceMgr & SM)114 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
115     StringRef &Expr, FileCheckPatternContext *Context,
116     Optional<size_t> LineNumber, const SourceMgr &SM) {
117   Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
118   if (!ParseVarResult)
119     return ParseVarResult.takeError();
120   StringRef Name = ParseVarResult->Name;
121 
122   if (ParseVarResult->IsPseudo)
123     return ErrorDiagnostic::get(
124         SM, Name, "definition of pseudo numeric variable unsupported");
125 
126   // Detect collisions between string and numeric variables when the latter
127   // is created later than the former.
128   if (Context->DefinedVariableTable.find(Name) !=
129       Context->DefinedVariableTable.end())
130     return ErrorDiagnostic::get(
131         SM, Name, "string variable with name '" + Name + "' already exists");
132 
133   Expr = Expr.ltrim(SpaceChars);
134   if (!Expr.empty())
135     return ErrorDiagnostic::get(
136         SM, Expr, "unexpected characters after numeric variable name");
137 
138   NumericVariable *DefinedNumericVariable;
139   auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
140   if (VarTableIter != Context->GlobalNumericVariableTable.end())
141     DefinedNumericVariable = VarTableIter->second;
142   else
143     DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber);
144 
145   return DefinedNumericVariable;
146 }
147 
parseNumericVariableUse(StringRef Name,bool IsPseudo,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)148 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse(
149     StringRef Name, bool IsPseudo, Optional<size_t> LineNumber,
150     FileCheckPatternContext *Context, const SourceMgr &SM) {
151   if (IsPseudo && !Name.equals("@LINE"))
152     return ErrorDiagnostic::get(
153         SM, Name, "invalid pseudo numeric variable '" + Name + "'");
154 
155   // Numeric variable definitions and uses are parsed in the order in which
156   // they appear in the CHECK patterns. For each definition, the pointer to the
157   // class instance of the corresponding numeric variable definition is stored
158   // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
159   // we get below is null, it means no such variable was defined before. When
160   // that happens, we create a dummy variable so that parsing can continue. All
161   // uses of undefined variables, whether string or numeric, are then diagnosed
162   // in printSubstitutions() after failing to match.
163   auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
164   NumericVariable *NumericVariable;
165   if (VarTableIter != Context->GlobalNumericVariableTable.end())
166     NumericVariable = VarTableIter->second;
167   else {
168     NumericVariable = Context->makeNumericVariable(Name);
169     Context->GlobalNumericVariableTable[Name] = NumericVariable;
170   }
171 
172   Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
173   if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
174     return ErrorDiagnostic::get(
175         SM, Name,
176         "numeric variable '" + Name +
177             "' defined earlier in the same CHECK directive");
178 
179   return std::make_unique<NumericVariableUse>(Name, NumericVariable);
180 }
181 
parseNumericOperand(StringRef & Expr,AllowedOperand AO,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)182 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
183     StringRef &Expr, AllowedOperand AO, Optional<size_t> LineNumber,
184     FileCheckPatternContext *Context, const SourceMgr &SM) {
185   if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
186     // Try to parse as a numeric variable use.
187     Expected<Pattern::VariableProperties> ParseVarResult =
188         parseVariable(Expr, SM);
189     if (ParseVarResult)
190       return parseNumericVariableUse(ParseVarResult->Name,
191                                      ParseVarResult->IsPseudo, LineNumber,
192                                      Context, SM);
193     if (AO == AllowedOperand::LineVar)
194       return ParseVarResult.takeError();
195     // Ignore the error and retry parsing as a literal.
196     consumeError(ParseVarResult.takeError());
197   }
198 
199   // Otherwise, parse it as a literal.
200   uint64_t LiteralValue;
201   if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
202     return std::make_unique<ExpressionLiteral>(LiteralValue);
203 
204   return ErrorDiagnostic::get(SM, Expr,
205                               "invalid operand format '" + Expr + "'");
206 }
207 
add(uint64_t LeftOp,uint64_t RightOp)208 static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
209   return LeftOp + RightOp;
210 }
211 
sub(uint64_t LeftOp,uint64_t RightOp)212 static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
213   return LeftOp - RightOp;
214 }
215 
216 Expected<std::unique_ptr<ExpressionAST>>
parseBinop(StringRef & Expr,std::unique_ptr<ExpressionAST> LeftOp,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)217 Pattern::parseBinop(StringRef &Expr, std::unique_ptr<ExpressionAST> LeftOp,
218                     bool IsLegacyLineExpr, Optional<size_t> LineNumber,
219                     FileCheckPatternContext *Context, const SourceMgr &SM) {
220   Expr = Expr.ltrim(SpaceChars);
221   if (Expr.empty())
222     return std::move(LeftOp);
223 
224   // Check if this is a supported operation and select a function to perform
225   // it.
226   SMLoc OpLoc = SMLoc::getFromPointer(Expr.data());
227   char Operator = popFront(Expr);
228   binop_eval_t EvalBinop;
229   switch (Operator) {
230   case '+':
231     EvalBinop = add;
232     break;
233   case '-':
234     EvalBinop = sub;
235     break;
236   default:
237     return ErrorDiagnostic::get(
238         SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
239   }
240 
241   // Parse right operand.
242   Expr = Expr.ltrim(SpaceChars);
243   if (Expr.empty())
244     return ErrorDiagnostic::get(SM, Expr, "missing operand in expression");
245   // The second operand in a legacy @LINE expression is always a literal.
246   AllowedOperand AO =
247       IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
248   Expected<std::unique_ptr<ExpressionAST>> RightOpResult =
249       parseNumericOperand(Expr, AO, LineNumber, Context, SM);
250   if (!RightOpResult)
251     return RightOpResult;
252 
253   Expr = Expr.ltrim(SpaceChars);
254   return std::make_unique<BinaryOperation>(EvalBinop, std::move(LeftOp),
255                                            std::move(*RightOpResult));
256 }
257 
parseNumericSubstitutionBlock(StringRef Expr,Optional<NumericVariable * > & DefinedNumericVariable,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)258 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericSubstitutionBlock(
259     StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable,
260     bool IsLegacyLineExpr, Optional<size_t> LineNumber,
261     FileCheckPatternContext *Context, const SourceMgr &SM) {
262   std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr;
263   StringRef DefExpr = StringRef();
264   DefinedNumericVariable = None;
265   // Save variable definition expression if any.
266   size_t DefEnd = Expr.find(':');
267   if (DefEnd != StringRef::npos) {
268     DefExpr = Expr.substr(0, DefEnd);
269     Expr = Expr.substr(DefEnd + 1);
270   }
271 
272   // Parse the expression itself.
273   Expr = Expr.ltrim(SpaceChars);
274   if (!Expr.empty()) {
275     // The first operand in a legacy @LINE expression is always the @LINE
276     // pseudo variable.
277     AllowedOperand AO =
278         IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
279     Expected<std::unique_ptr<ExpressionAST>> ParseResult =
280         parseNumericOperand(Expr, AO, LineNumber, Context, SM);
281     while (ParseResult && !Expr.empty()) {
282       ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr,
283                                LineNumber, Context, SM);
284       // Legacy @LINE expressions only allow 2 operands.
285       if (ParseResult && IsLegacyLineExpr && !Expr.empty())
286         return ErrorDiagnostic::get(
287             SM, Expr,
288             "unexpected characters at end of expression '" + Expr + "'");
289     }
290     if (!ParseResult)
291       return ParseResult;
292     ExpressionASTPointer = std::move(*ParseResult);
293   }
294 
295   // Parse the numeric variable definition.
296   if (DefEnd != StringRef::npos) {
297     DefExpr = DefExpr.ltrim(SpaceChars);
298     Expected<NumericVariable *> ParseResult =
299         parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
300 
301     if (!ParseResult)
302       return ParseResult.takeError();
303     DefinedNumericVariable = *ParseResult;
304   }
305 
306   return std::move(ExpressionASTPointer);
307 }
308 
parsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,const FileCheckRequest & Req)309 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
310                            SourceMgr &SM, const FileCheckRequest &Req) {
311   bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
312   IgnoreCase = Req.IgnoreCase;
313 
314   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
315 
316   if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
317     // Ignore trailing whitespace.
318     while (!PatternStr.empty() &&
319            (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
320       PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
321 
322   // Check that there is something on the line.
323   if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
324     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
325                     "found empty check string with prefix '" + Prefix + ":'");
326     return true;
327   }
328 
329   if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
330     SM.PrintMessage(
331         PatternLoc, SourceMgr::DK_Error,
332         "found non-empty check string for empty check with prefix '" + Prefix +
333             ":'");
334     return true;
335   }
336 
337   if (CheckTy == Check::CheckEmpty) {
338     RegExStr = "(\n$)";
339     return false;
340   }
341 
342   // Check to see if this is a fixed string, or if it has regex pieces.
343   if (!MatchFullLinesHere &&
344       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
345                                  PatternStr.find("[[") == StringRef::npos))) {
346     FixedStr = PatternStr;
347     return false;
348   }
349 
350   if (MatchFullLinesHere) {
351     RegExStr += '^';
352     if (!Req.NoCanonicalizeWhiteSpace)
353       RegExStr += " *";
354   }
355 
356   // Paren value #0 is for the fully matched string.  Any new parenthesized
357   // values add from there.
358   unsigned CurParen = 1;
359 
360   // Otherwise, there is at least one regex piece.  Build up the regex pattern
361   // by escaping scary characters in fixed strings, building up one big regex.
362   while (!PatternStr.empty()) {
363     // RegEx matches.
364     if (PatternStr.startswith("{{")) {
365       // This is the start of a regex match.  Scan for the }}.
366       size_t End = PatternStr.find("}}");
367       if (End == StringRef::npos) {
368         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
369                         SourceMgr::DK_Error,
370                         "found start of regex string with no end '}}'");
371         return true;
372       }
373 
374       // Enclose {{}} patterns in parens just like [[]] even though we're not
375       // capturing the result for any purpose.  This is required in case the
376       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
377       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
378       RegExStr += '(';
379       ++CurParen;
380 
381       if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
382         return true;
383       RegExStr += ')';
384 
385       PatternStr = PatternStr.substr(End + 2);
386       continue;
387     }
388 
389     // String and numeric substitution blocks. Pattern substitution blocks come
390     // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
391     // other regex) and assigns it to the string variable 'foo'. The latter
392     // substitutes foo's value. Numeric substitution blocks recognize the same
393     // form as string ones, but start with a '#' sign after the double
394     // brackets. They also accept a combined form which sets a numeric variable
395     // to the evaluation of an expression. Both string and numeric variable
396     // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
397     // valid, as this helps catch some common errors.
398     if (PatternStr.startswith("[[")) {
399       StringRef UnparsedPatternStr = PatternStr.substr(2);
400       // Find the closing bracket pair ending the match.  End is going to be an
401       // offset relative to the beginning of the match string.
402       size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
403       StringRef MatchStr = UnparsedPatternStr.substr(0, End);
404       bool IsNumBlock = MatchStr.consume_front("#");
405 
406       if (End == StringRef::npos) {
407         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
408                         SourceMgr::DK_Error,
409                         "Invalid substitution block, no ]] found");
410         return true;
411       }
412       // Strip the substitution block we are parsing. End points to the start
413       // of the "]]" closing the expression so account for it in computing the
414       // index of the first unparsed character.
415       PatternStr = UnparsedPatternStr.substr(End + 2);
416 
417       bool IsDefinition = false;
418       bool SubstNeeded = false;
419       // Whether the substitution block is a legacy use of @LINE with string
420       // substitution block syntax.
421       bool IsLegacyLineExpr = false;
422       StringRef DefName;
423       StringRef SubstStr;
424       StringRef MatchRegexp;
425       size_t SubstInsertIdx = RegExStr.size();
426 
427       // Parse string variable or legacy @LINE expression.
428       if (!IsNumBlock) {
429         size_t VarEndIdx = MatchStr.find(":");
430         size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
431         if (SpacePos != StringRef::npos) {
432           SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
433                           SourceMgr::DK_Error, "unexpected whitespace");
434           return true;
435         }
436 
437         // Get the name (e.g. "foo") and verify it is well formed.
438         StringRef OrigMatchStr = MatchStr;
439         Expected<Pattern::VariableProperties> ParseVarResult =
440             parseVariable(MatchStr, SM);
441         if (!ParseVarResult) {
442           logAllUnhandledErrors(ParseVarResult.takeError(), errs());
443           return true;
444         }
445         StringRef Name = ParseVarResult->Name;
446         bool IsPseudo = ParseVarResult->IsPseudo;
447 
448         IsDefinition = (VarEndIdx != StringRef::npos);
449         SubstNeeded = !IsDefinition;
450         if (IsDefinition) {
451           if ((IsPseudo || !MatchStr.consume_front(":"))) {
452             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
453                             SourceMgr::DK_Error,
454                             "invalid name in string variable definition");
455             return true;
456           }
457 
458           // Detect collisions between string and numeric variables when the
459           // former is created later than the latter.
460           if (Context->GlobalNumericVariableTable.find(Name) !=
461               Context->GlobalNumericVariableTable.end()) {
462             SM.PrintMessage(
463                 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
464                 "numeric variable with name '" + Name + "' already exists");
465             return true;
466           }
467           DefName = Name;
468           MatchRegexp = MatchStr;
469         } else {
470           if (IsPseudo) {
471             MatchStr = OrigMatchStr;
472             IsLegacyLineExpr = IsNumBlock = true;
473           } else
474             SubstStr = Name;
475         }
476       }
477 
478       // Parse numeric substitution block.
479       std::unique_ptr<ExpressionAST> ExpressionASTPointer;
480       Optional<NumericVariable *> DefinedNumericVariable;
481       if (IsNumBlock) {
482         Expected<std::unique_ptr<ExpressionAST>> ParseResult =
483             parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
484                                           IsLegacyLineExpr, LineNumber, Context,
485                                           SM);
486         if (!ParseResult) {
487           logAllUnhandledErrors(ParseResult.takeError(), errs());
488           return true;
489         }
490         ExpressionASTPointer = std::move(*ParseResult);
491         SubstNeeded = ExpressionASTPointer != nullptr;
492         if (DefinedNumericVariable) {
493           IsDefinition = true;
494           DefName = (*DefinedNumericVariable)->getName();
495         }
496         if (SubstNeeded)
497           SubstStr = MatchStr;
498         else
499           MatchRegexp = "[0-9]+";
500       }
501 
502       // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]].
503       if (IsDefinition) {
504         RegExStr += '(';
505         ++SubstInsertIdx;
506 
507         if (IsNumBlock) {
508           NumericVariableMatch NumericVariableDefinition = {
509               *DefinedNumericVariable, CurParen};
510           NumericVariableDefs[DefName] = NumericVariableDefinition;
511           // This store is done here rather than in match() to allow
512           // parseNumericVariableUse() to get the pointer to the class instance
513           // of the right variable definition corresponding to a given numeric
514           // variable use.
515           Context->GlobalNumericVariableTable[DefName] =
516               *DefinedNumericVariable;
517         } else {
518           VariableDefs[DefName] = CurParen;
519           // Mark string variable as defined to detect collisions between
520           // string and numeric variables in parseNumericVariableUse() and
521           // defineCmdlineVariables() when the latter is created later than the
522           // former. We cannot reuse GlobalVariableTable for this by populating
523           // it with an empty string since we would then lose the ability to
524           // detect the use of an undefined variable in match().
525           Context->DefinedVariableTable[DefName] = true;
526         }
527 
528         ++CurParen;
529       }
530 
531       if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM))
532         return true;
533 
534       if (IsDefinition)
535         RegExStr += ')';
536 
537       // Handle substitutions: [[foo]] and [[#<foo expr>]].
538       if (SubstNeeded) {
539         // Handle substitution of string variables that were defined earlier on
540         // the same line by emitting a backreference. Expressions do not
541         // support substituting a numeric variable defined on the same line.
542         if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
543           unsigned CaptureParenGroup = VariableDefs[SubstStr];
544           if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
545             SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
546                             SourceMgr::DK_Error,
547                             "Can't back-reference more than 9 variables");
548             return true;
549           }
550           AddBackrefToRegEx(CaptureParenGroup);
551         } else {
552           // Handle substitution of string variables ([[<var>]]) defined in
553           // previous CHECK patterns, and substitution of expressions.
554           Substitution *Substitution =
555               IsNumBlock
556                   ? Context->makeNumericSubstitution(
557                         SubstStr, std::move(ExpressionASTPointer),
558                         SubstInsertIdx)
559                   : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
560           Substitutions.push_back(Substitution);
561         }
562       }
563     }
564 
565     // Handle fixed string matches.
566     // Find the end, which is the start of the next regex.
567     size_t FixedMatchEnd = PatternStr.find("{{");
568     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
569     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
570     PatternStr = PatternStr.substr(FixedMatchEnd);
571   }
572 
573   if (MatchFullLinesHere) {
574     if (!Req.NoCanonicalizeWhiteSpace)
575       RegExStr += " *";
576     RegExStr += '$';
577   }
578 
579   return false;
580 }
581 
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)582 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
583   Regex R(RS);
584   std::string Error;
585   if (!R.isValid(Error)) {
586     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
587                     "invalid regex: " + Error);
588     return true;
589   }
590 
591   RegExStr += RS.str();
592   CurParen += R.getNumMatches();
593   return false;
594 }
595 
AddBackrefToRegEx(unsigned BackrefNum)596 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
597   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
598   std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
599   RegExStr += Backref;
600 }
601 
match(StringRef Buffer,size_t & MatchLen,const SourceMgr & SM) const602 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen,
603                                 const SourceMgr &SM) const {
604   // If this is the EOF pattern, match it immediately.
605   if (CheckTy == Check::CheckEOF) {
606     MatchLen = 0;
607     return Buffer.size();
608   }
609 
610   // If this is a fixed string pattern, just match it now.
611   if (!FixedStr.empty()) {
612     MatchLen = FixedStr.size();
613     size_t Pos =
614         IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr);
615     if (Pos == StringRef::npos)
616       return make_error<NotFoundError>();
617     return Pos;
618   }
619 
620   // Regex match.
621 
622   // If there are substitutions, we need to create a temporary string with the
623   // actual value.
624   StringRef RegExToMatch = RegExStr;
625   std::string TmpStr;
626   if (!Substitutions.empty()) {
627     TmpStr = RegExStr;
628     if (LineNumber)
629       Context->LineVariable->setValue(*LineNumber);
630 
631     size_t InsertOffset = 0;
632     // Substitute all string variables and expressions whose values are only
633     // now known. Use of string variables defined on the same line are handled
634     // by back-references.
635     for (const auto &Substitution : Substitutions) {
636       // Substitute and check for failure (e.g. use of undefined variable).
637       Expected<std::string> Value = Substitution->getResult();
638       if (!Value)
639         return Value.takeError();
640 
641       // Plop it into the regex at the adjusted offset.
642       TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
643                     Value->begin(), Value->end());
644       InsertOffset += Value->size();
645     }
646 
647     // Match the newly constructed regex.
648     RegExToMatch = TmpStr;
649   }
650 
651   SmallVector<StringRef, 4> MatchInfo;
652   unsigned int Flags = Regex::Newline;
653   if (IgnoreCase)
654     Flags |= Regex::IgnoreCase;
655   if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
656     return make_error<NotFoundError>();
657 
658   // Successful regex match.
659   assert(!MatchInfo.empty() && "Didn't get any match");
660   StringRef FullMatch = MatchInfo[0];
661 
662   // If this defines any string variables, remember their values.
663   for (const auto &VariableDef : VariableDefs) {
664     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
665     Context->GlobalVariableTable[VariableDef.first] =
666         MatchInfo[VariableDef.second];
667   }
668 
669   // If this defines any numeric variables, remember their values.
670   for (const auto &NumericVariableDef : NumericVariableDefs) {
671     const NumericVariableMatch &NumericVariableMatch =
672         NumericVariableDef.getValue();
673     unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
674     assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
675     NumericVariable *DefinedNumericVariable =
676         NumericVariableMatch.DefinedNumericVariable;
677 
678     StringRef MatchedValue = MatchInfo[CaptureParenGroup];
679     uint64_t Val;
680     if (MatchedValue.getAsInteger(10, Val))
681       return ErrorDiagnostic::get(SM, MatchedValue,
682                                   "Unable to represent numeric value");
683     DefinedNumericVariable->setValue(Val);
684   }
685 
686   // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
687   // the required preceding newline, which is consumed by the pattern in the
688   // case of CHECK-EMPTY but not CHECK-NEXT.
689   size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
690   MatchLen = FullMatch.size() - MatchStartSkip;
691   return FullMatch.data() - Buffer.data() + MatchStartSkip;
692 }
693 
computeMatchDistance(StringRef Buffer) const694 unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
695   // Just compute the number of matching characters. For regular expressions, we
696   // just compare against the regex itself and hope for the best.
697   //
698   // FIXME: One easy improvement here is have the regex lib generate a single
699   // example regular expression which matches, and use that as the example
700   // string.
701   StringRef ExampleString(FixedStr);
702   if (ExampleString.empty())
703     ExampleString = RegExStr;
704 
705   // Only compare up to the first line in the buffer, or the string size.
706   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
707   BufferPrefix = BufferPrefix.split('\n').first;
708   return BufferPrefix.edit_distance(ExampleString);
709 }
710 
printSubstitutions(const SourceMgr & SM,StringRef Buffer,SMRange MatchRange) const711 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
712                                  SMRange MatchRange) const {
713   // Print what we know about substitutions.
714   if (!Substitutions.empty()) {
715     for (const auto &Substitution : Substitutions) {
716       SmallString<256> Msg;
717       raw_svector_ostream OS(Msg);
718       Expected<std::string> MatchedValue = Substitution->getResult();
719 
720       // Substitution failed or is not known at match time, print the undefined
721       // variables it uses.
722       if (!MatchedValue) {
723         bool UndefSeen = false;
724         handleAllErrors(MatchedValue.takeError(), [](const NotFoundError &E) {},
725                         // Handled in PrintNoMatch().
726                         [](const ErrorDiagnostic &E) {},
727                         [&](const UndefVarError &E) {
728                           if (!UndefSeen) {
729                             OS << "uses undefined variable(s):";
730                             UndefSeen = true;
731                           }
732                           OS << " ";
733                           E.log(OS);
734                         });
735       } else {
736         // Substitution succeeded. Print substituted value.
737         OS << "with \"";
738         OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
739         OS.write_escaped(*MatchedValue) << "\"";
740       }
741 
742       if (MatchRange.isValid())
743         SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
744                         {MatchRange});
745       else
746         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
747                         SourceMgr::DK_Note, OS.str());
748     }
749   }
750 }
751 
ProcessMatchResult(FileCheckDiag::MatchType MatchTy,const SourceMgr & SM,SMLoc Loc,Check::FileCheckType CheckTy,StringRef Buffer,size_t Pos,size_t Len,std::vector<FileCheckDiag> * Diags,bool AdjustPrevDiag=false)752 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
753                                   const SourceMgr &SM, SMLoc Loc,
754                                   Check::FileCheckType CheckTy,
755                                   StringRef Buffer, size_t Pos, size_t Len,
756                                   std::vector<FileCheckDiag> *Diags,
757                                   bool AdjustPrevDiag = false) {
758   SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
759   SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
760   SMRange Range(Start, End);
761   if (Diags) {
762     if (AdjustPrevDiag)
763       Diags->rbegin()->MatchTy = MatchTy;
764     else
765       Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
766   }
767   return Range;
768 }
769 
printFuzzyMatch(const SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags) const770 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
771                               std::vector<FileCheckDiag> *Diags) const {
772   // Attempt to find the closest/best fuzzy match.  Usually an error happens
773   // because some string in the output didn't exactly match. In these cases, we
774   // would like to show the user a best guess at what "should have" matched, to
775   // save them having to actually check the input manually.
776   size_t NumLinesForward = 0;
777   size_t Best = StringRef::npos;
778   double BestQuality = 0;
779 
780   // Use an arbitrary 4k limit on how far we will search.
781   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
782     if (Buffer[i] == '\n')
783       ++NumLinesForward;
784 
785     // Patterns have leading whitespace stripped, so skip whitespace when
786     // looking for something which looks like a pattern.
787     if (Buffer[i] == ' ' || Buffer[i] == '\t')
788       continue;
789 
790     // Compute the "quality" of this match as an arbitrary combination of the
791     // match distance and the number of lines skipped to get to this match.
792     unsigned Distance = computeMatchDistance(Buffer.substr(i));
793     double Quality = Distance + (NumLinesForward / 100.);
794 
795     if (Quality < BestQuality || Best == StringRef::npos) {
796       Best = i;
797       BestQuality = Quality;
798     }
799   }
800 
801   // Print the "possible intended match here" line if we found something
802   // reasonable and not equal to what we showed in the "scanning from here"
803   // line.
804   if (Best && Best != StringRef::npos && BestQuality < 50) {
805     SMRange MatchRange =
806         ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
807                            getCheckTy(), Buffer, Best, 0, Diags);
808     SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
809                     "possible intended match here");
810 
811     // FIXME: If we wanted to be really friendly we would show why the match
812     // failed, as it can be hard to spot simple one character differences.
813   }
814 }
815 
816 Expected<StringRef>
getPatternVarValue(StringRef VarName)817 FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
818   auto VarIter = GlobalVariableTable.find(VarName);
819   if (VarIter == GlobalVariableTable.end())
820     return make_error<UndefVarError>(VarName);
821 
822   return VarIter->second;
823 }
824 
825 template <class... Types>
makeNumericVariable(Types...args)826 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) {
827   NumericVariables.push_back(std::make_unique<NumericVariable>(args...));
828   return NumericVariables.back().get();
829 }
830 
831 Substitution *
makeStringSubstitution(StringRef VarName,size_t InsertIdx)832 FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
833                                                 size_t InsertIdx) {
834   Substitutions.push_back(
835       std::make_unique<StringSubstitution>(this, VarName, InsertIdx));
836   return Substitutions.back().get();
837 }
838 
makeNumericSubstitution(StringRef ExpressionStr,std::unique_ptr<ExpressionAST> ExpressionASTPointer,size_t InsertIdx)839 Substitution *FileCheckPatternContext::makeNumericSubstitution(
840     StringRef ExpressionStr,
841     std::unique_ptr<ExpressionAST> ExpressionASTPointer, size_t InsertIdx) {
842   Substitutions.push_back(std::make_unique<NumericSubstitution>(
843       this, ExpressionStr, std::move(ExpressionASTPointer), InsertIdx));
844   return Substitutions.back().get();
845 }
846 
FindRegexVarEnd(StringRef Str,SourceMgr & SM)847 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
848   // Offset keeps track of the current offset within the input Str
849   size_t Offset = 0;
850   // [...] Nesting depth
851   size_t BracketDepth = 0;
852 
853   while (!Str.empty()) {
854     if (Str.startswith("]]") && BracketDepth == 0)
855       return Offset;
856     if (Str[0] == '\\') {
857       // Backslash escapes the next char within regexes, so skip them both.
858       Str = Str.substr(2);
859       Offset += 2;
860     } else {
861       switch (Str[0]) {
862       default:
863         break;
864       case '[':
865         BracketDepth++;
866         break;
867       case ']':
868         if (BracketDepth == 0) {
869           SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
870                           SourceMgr::DK_Error,
871                           "missing closing \"]\" for regex variable");
872           exit(1);
873         }
874         BracketDepth--;
875         break;
876       }
877       Str = Str.substr(1);
878       Offset++;
879     }
880   }
881 
882   return StringRef::npos;
883 }
884 
CanonicalizeFile(MemoryBuffer & MB,SmallVectorImpl<char> & OutputBuffer)885 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
886                                       SmallVectorImpl<char> &OutputBuffer) {
887   OutputBuffer.reserve(MB.getBufferSize());
888 
889   for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
890        Ptr != End; ++Ptr) {
891     // Eliminate trailing dosish \r.
892     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
893       continue;
894     }
895 
896     // If current char is not a horizontal whitespace or if horizontal
897     // whitespace canonicalization is disabled, dump it to output as is.
898     if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
899       OutputBuffer.push_back(*Ptr);
900       continue;
901     }
902 
903     // Otherwise, add one space and advance over neighboring space.
904     OutputBuffer.push_back(' ');
905     while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
906       ++Ptr;
907   }
908 
909   // Add a null byte and then return all but that byte.
910   OutputBuffer.push_back('\0');
911   return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
912 }
913 
FileCheckDiag(const SourceMgr & SM,const Check::FileCheckType & CheckTy,SMLoc CheckLoc,MatchType MatchTy,SMRange InputRange)914 FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
915                              const Check::FileCheckType &CheckTy,
916                              SMLoc CheckLoc, MatchType MatchTy,
917                              SMRange InputRange)
918     : CheckTy(CheckTy), MatchTy(MatchTy) {
919   auto Start = SM.getLineAndColumn(InputRange.Start);
920   auto End = SM.getLineAndColumn(InputRange.End);
921   InputStartLine = Start.first;
922   InputStartCol = Start.second;
923   InputEndLine = End.first;
924   InputEndCol = End.second;
925   Start = SM.getLineAndColumn(CheckLoc);
926   CheckLine = Start.first;
927   CheckCol = Start.second;
928 }
929 
IsPartOfWord(char c)930 static bool IsPartOfWord(char c) {
931   return (isalnum(c) || c == '-' || c == '_');
932 }
933 
setCount(int C)934 Check::FileCheckType &Check::FileCheckType::setCount(int C) {
935   assert(Count > 0 && "zero and negative counts are not supported");
936   assert((C == 1 || Kind == CheckPlain) &&
937          "count supported only for plain CHECK directives");
938   Count = C;
939   return *this;
940 }
941 
getDescription(StringRef Prefix) const942 std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
943   switch (Kind) {
944   case Check::CheckNone:
945     return "invalid";
946   case Check::CheckPlain:
947     if (Count > 1)
948       return Prefix.str() + "-COUNT";
949     return Prefix;
950   case Check::CheckNext:
951     return Prefix.str() + "-NEXT";
952   case Check::CheckSame:
953     return Prefix.str() + "-SAME";
954   case Check::CheckNot:
955     return Prefix.str() + "-NOT";
956   case Check::CheckDAG:
957     return Prefix.str() + "-DAG";
958   case Check::CheckLabel:
959     return Prefix.str() + "-LABEL";
960   case Check::CheckEmpty:
961     return Prefix.str() + "-EMPTY";
962   case Check::CheckEOF:
963     return "implicit EOF";
964   case Check::CheckBadNot:
965     return "bad NOT";
966   case Check::CheckBadCount:
967     return "bad COUNT";
968   }
969   llvm_unreachable("unknown FileCheckType");
970 }
971 
972 static std::pair<Check::FileCheckType, StringRef>
FindCheckType(StringRef Buffer,StringRef Prefix)973 FindCheckType(StringRef Buffer, StringRef Prefix) {
974   if (Buffer.size() <= Prefix.size())
975     return {Check::CheckNone, StringRef()};
976 
977   char NextChar = Buffer[Prefix.size()];
978 
979   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
980   // Verify that the : is present after the prefix.
981   if (NextChar == ':')
982     return {Check::CheckPlain, Rest};
983 
984   if (NextChar != '-')
985     return {Check::CheckNone, StringRef()};
986 
987   if (Rest.consume_front("COUNT-")) {
988     int64_t Count;
989     if (Rest.consumeInteger(10, Count))
990       // Error happened in parsing integer.
991       return {Check::CheckBadCount, Rest};
992     if (Count <= 0 || Count > INT32_MAX)
993       return {Check::CheckBadCount, Rest};
994     if (!Rest.consume_front(":"))
995       return {Check::CheckBadCount, Rest};
996     return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
997   }
998 
999   if (Rest.consume_front("NEXT:"))
1000     return {Check::CheckNext, Rest};
1001 
1002   if (Rest.consume_front("SAME:"))
1003     return {Check::CheckSame, Rest};
1004 
1005   if (Rest.consume_front("NOT:"))
1006     return {Check::CheckNot, Rest};
1007 
1008   if (Rest.consume_front("DAG:"))
1009     return {Check::CheckDAG, Rest};
1010 
1011   if (Rest.consume_front("LABEL:"))
1012     return {Check::CheckLabel, Rest};
1013 
1014   if (Rest.consume_front("EMPTY:"))
1015     return {Check::CheckEmpty, Rest};
1016 
1017   // You can't combine -NOT with another suffix.
1018   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
1019       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
1020       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
1021       Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
1022     return {Check::CheckBadNot, Rest};
1023 
1024   return {Check::CheckNone, Rest};
1025 }
1026 
1027 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)1028 static size_t SkipWord(StringRef Str, size_t Loc) {
1029   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
1030     ++Loc;
1031   return Loc;
1032 }
1033 
1034 /// Searches the buffer for the first prefix in the prefix regular expression.
1035 ///
1036 /// This searches the buffer using the provided regular expression, however it
1037 /// enforces constraints beyond that:
1038 /// 1) The found prefix must not be a suffix of something that looks like
1039 ///    a valid prefix.
1040 /// 2) The found prefix must be followed by a valid check type suffix using \c
1041 ///    FindCheckType above.
1042 ///
1043 /// \returns a pair of StringRefs into the Buffer, which combines:
1044 ///   - the first match of the regular expression to satisfy these two is
1045 ///   returned,
1046 ///     otherwise an empty StringRef is returned to indicate failure.
1047 ///   - buffer rewound to the location right after parsed suffix, for parsing
1048 ///     to continue from
1049 ///
1050 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
1051 /// start at the beginning of the returned prefix, increment \p LineNumber for
1052 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
1053 /// check found by examining the suffix.
1054 ///
1055 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
1056 /// is unspecified.
1057 static std::pair<StringRef, StringRef>
FindFirstMatchingPrefix(Regex & PrefixRE,StringRef & Buffer,unsigned & LineNumber,Check::FileCheckType & CheckTy)1058 FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
1059                         unsigned &LineNumber, Check::FileCheckType &CheckTy) {
1060   SmallVector<StringRef, 2> Matches;
1061 
1062   while (!Buffer.empty()) {
1063     // Find the first (longest) match using the RE.
1064     if (!PrefixRE.match(Buffer, &Matches))
1065       // No match at all, bail.
1066       return {StringRef(), StringRef()};
1067 
1068     StringRef Prefix = Matches[0];
1069     Matches.clear();
1070 
1071     assert(Prefix.data() >= Buffer.data() &&
1072            Prefix.data() < Buffer.data() + Buffer.size() &&
1073            "Prefix doesn't start inside of buffer!");
1074     size_t Loc = Prefix.data() - Buffer.data();
1075     StringRef Skipped = Buffer.substr(0, Loc);
1076     Buffer = Buffer.drop_front(Loc);
1077     LineNumber += Skipped.count('\n');
1078 
1079     // Check that the matched prefix isn't a suffix of some other check-like
1080     // word.
1081     // FIXME: This is a very ad-hoc check. it would be better handled in some
1082     // other way. Among other things it seems hard to distinguish between
1083     // intentional and unintentional uses of this feature.
1084     if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
1085       // Now extract the type.
1086       StringRef AfterSuffix;
1087       std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
1088 
1089       // If we've found a valid check type for this prefix, we're done.
1090       if (CheckTy != Check::CheckNone)
1091         return {Prefix, AfterSuffix};
1092     }
1093 
1094     // If we didn't successfully find a prefix, we need to skip this invalid
1095     // prefix and continue scanning. We directly skip the prefix that was
1096     // matched and any additional parts of that check-like word.
1097     Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
1098   }
1099 
1100   // We ran out of buffer while skipping partial matches so give up.
1101   return {StringRef(), StringRef()};
1102 }
1103 
createLineVariable()1104 void FileCheckPatternContext::createLineVariable() {
1105   assert(!LineVariable && "@LINE pseudo numeric variable already created");
1106   StringRef LineName = "@LINE";
1107   LineVariable = makeNumericVariable(LineName);
1108   GlobalNumericVariableTable[LineName] = LineVariable;
1109 }
1110 
FileCheck(FileCheckRequest Req)1111 FileCheck::FileCheck(FileCheckRequest Req)
1112     : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()),
1113       CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {}
1114 
1115 FileCheck::~FileCheck() = default;
1116 
readCheckFile(SourceMgr & SM,StringRef Buffer,Regex & PrefixRE)1117 bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
1118                               Regex &PrefixRE) {
1119   Error DefineError =
1120       PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
1121   if (DefineError) {
1122     logAllUnhandledErrors(std::move(DefineError), errs());
1123     return true;
1124   }
1125 
1126   PatternContext->createLineVariable();
1127 
1128   std::vector<Pattern> ImplicitNegativeChecks;
1129   for (const auto &PatternString : Req.ImplicitCheckNot) {
1130     // Create a buffer with fake command line content in order to display the
1131     // command line option responsible for the specific implicit CHECK-NOT.
1132     std::string Prefix = "-implicit-check-not='";
1133     std::string Suffix = "'";
1134     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
1135         Prefix + PatternString + Suffix, "command line");
1136 
1137     StringRef PatternInBuffer =
1138         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
1139     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
1140 
1141     ImplicitNegativeChecks.push_back(
1142         Pattern(Check::CheckNot, PatternContext.get()));
1143     ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
1144                                                "IMPLICIT-CHECK", SM, Req);
1145   }
1146 
1147   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
1148 
1149   // LineNumber keeps track of the line on which CheckPrefix instances are
1150   // found.
1151   unsigned LineNumber = 1;
1152 
1153   while (1) {
1154     Check::FileCheckType CheckTy;
1155 
1156     // See if a prefix occurs in the memory buffer.
1157     StringRef UsedPrefix;
1158     StringRef AfterSuffix;
1159     std::tie(UsedPrefix, AfterSuffix) =
1160         FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
1161     if (UsedPrefix.empty())
1162       break;
1163     assert(UsedPrefix.data() == Buffer.data() &&
1164            "Failed to move Buffer's start forward, or pointed prefix outside "
1165            "of the buffer!");
1166     assert(AfterSuffix.data() >= Buffer.data() &&
1167            AfterSuffix.data() < Buffer.data() + Buffer.size() &&
1168            "Parsing after suffix doesn't start inside of buffer!");
1169 
1170     // Location to use for error messages.
1171     const char *UsedPrefixStart = UsedPrefix.data();
1172 
1173     // Skip the buffer to the end of parsed suffix (or just prefix, if no good
1174     // suffix was processed).
1175     Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
1176                                  : AfterSuffix;
1177 
1178     // Complain about useful-looking but unsupported suffixes.
1179     if (CheckTy == Check::CheckBadNot) {
1180       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1181                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
1182       return true;
1183     }
1184 
1185     // Complain about invalid count specification.
1186     if (CheckTy == Check::CheckBadCount) {
1187       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1188                       "invalid count in -COUNT specification on prefix '" +
1189                           UsedPrefix + "'");
1190       return true;
1191     }
1192 
1193     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
1194     // leading whitespace.
1195     if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
1196       Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
1197 
1198     // Scan ahead to the end of line.
1199     size_t EOL = Buffer.find_first_of("\n\r");
1200 
1201     // Remember the location of the start of the pattern, for diagnostics.
1202     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
1203 
1204     // Parse the pattern.
1205     Pattern P(CheckTy, PatternContext.get(), LineNumber);
1206     if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
1207       return true;
1208 
1209     // Verify that CHECK-LABEL lines do not define or use variables
1210     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
1211       SM.PrintMessage(
1212           SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
1213           "found '" + UsedPrefix + "-LABEL:'"
1214                                    " with variable definition or use");
1215       return true;
1216     }
1217 
1218     Buffer = Buffer.substr(EOL);
1219 
1220     // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
1221     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
1222          CheckTy == Check::CheckEmpty) &&
1223         CheckStrings->empty()) {
1224       StringRef Type = CheckTy == Check::CheckNext
1225                            ? "NEXT"
1226                            : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
1227       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
1228                       SourceMgr::DK_Error,
1229                       "found '" + UsedPrefix + "-" + Type +
1230                           "' without previous '" + UsedPrefix + ": line");
1231       return true;
1232     }
1233 
1234     // Handle CHECK-DAG/-NOT.
1235     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
1236       DagNotMatches.push_back(P);
1237       continue;
1238     }
1239 
1240     // Okay, add the string we captured to the output vector and move on.
1241     CheckStrings->emplace_back(P, UsedPrefix, PatternLoc);
1242     std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1243     DagNotMatches = ImplicitNegativeChecks;
1244   }
1245 
1246   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
1247   // prefix as a filler for the error message.
1248   if (!DagNotMatches.empty()) {
1249     CheckStrings->emplace_back(
1250         Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
1251         *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
1252     std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1253   }
1254 
1255   if (CheckStrings->empty()) {
1256     errs() << "error: no check strings found with prefix"
1257            << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
1258     auto I = Req.CheckPrefixes.begin();
1259     auto E = Req.CheckPrefixes.end();
1260     if (I != E) {
1261       errs() << "\'" << *I << ":'";
1262       ++I;
1263     }
1264     for (; I != E; ++I)
1265       errs() << ", \'" << *I << ":'";
1266 
1267     errs() << '\n';
1268     return true;
1269   }
1270 
1271   return false;
1272 }
1273 
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1274 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1275                        StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1276                        int MatchedCount, StringRef Buffer, size_t MatchPos,
1277                        size_t MatchLen, const FileCheckRequest &Req,
1278                        std::vector<FileCheckDiag> *Diags) {
1279   bool PrintDiag = true;
1280   if (ExpectedMatch) {
1281     if (!Req.Verbose)
1282       return;
1283     if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1284       return;
1285     // Due to their verbosity, we don't print verbose diagnostics here if we're
1286     // gathering them for a different rendering, but we always print other
1287     // diagnostics.
1288     PrintDiag = !Diags;
1289   }
1290   SMRange MatchRange = ProcessMatchResult(
1291       ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
1292                     : FileCheckDiag::MatchFoundButExcluded,
1293       SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
1294   if (!PrintDiag)
1295     return;
1296 
1297   std::string Message = formatv("{0}: {1} string found in input",
1298                                 Pat.getCheckTy().getDescription(Prefix),
1299                                 (ExpectedMatch ? "expected" : "excluded"))
1300                             .str();
1301   if (Pat.getCount() > 1)
1302     Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1303 
1304   SM.PrintMessage(
1305       Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
1306   SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
1307                   {MatchRange});
1308   Pat.printSubstitutions(SM, Buffer, MatchRange);
1309 }
1310 
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1311 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1312                        const FileCheckString &CheckStr, int MatchedCount,
1313                        StringRef Buffer, size_t MatchPos, size_t MatchLen,
1314                        FileCheckRequest &Req,
1315                        std::vector<FileCheckDiag> *Diags) {
1316   PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1317              MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
1318 }
1319 
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1320 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1321                          StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1322                          int MatchedCount, StringRef Buffer,
1323                          bool VerboseVerbose, std::vector<FileCheckDiag> *Diags,
1324                          Error MatchErrors) {
1325   assert(MatchErrors && "Called on successful match");
1326   bool PrintDiag = true;
1327   if (!ExpectedMatch) {
1328     if (!VerboseVerbose) {
1329       consumeError(std::move(MatchErrors));
1330       return;
1331     }
1332     // Due to their verbosity, we don't print verbose diagnostics here if we're
1333     // gathering them for a different rendering, but we always print other
1334     // diagnostics.
1335     PrintDiag = !Diags;
1336   }
1337 
1338   // If the current position is at the end of a line, advance to the start of
1339   // the next line.
1340   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1341   SMRange SearchRange = ProcessMatchResult(
1342       ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
1343                     : FileCheckDiag::MatchNoneAndExcluded,
1344       SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
1345   if (!PrintDiag) {
1346     consumeError(std::move(MatchErrors));
1347     return;
1348   }
1349 
1350   MatchErrors = handleErrors(std::move(MatchErrors),
1351                              [](const ErrorDiagnostic &E) { E.log(errs()); });
1352 
1353   // No problem matching the string per se.
1354   if (!MatchErrors)
1355     return;
1356   consumeError(std::move(MatchErrors));
1357 
1358   // Print "not found" diagnostic.
1359   std::string Message = formatv("{0}: {1} string not found in input",
1360                                 Pat.getCheckTy().getDescription(Prefix),
1361                                 (ExpectedMatch ? "expected" : "excluded"))
1362                             .str();
1363   if (Pat.getCount() > 1)
1364     Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1365   SM.PrintMessage(
1366       Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
1367 
1368   // Print the "scanning from here" line.
1369   SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
1370 
1371   // Allow the pattern to print additional information if desired.
1372   Pat.printSubstitutions(SM, Buffer);
1373 
1374   if (ExpectedMatch)
1375     Pat.printFuzzyMatch(SM, Buffer, Diags);
1376 }
1377 
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1378 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1379                          const FileCheckString &CheckStr, int MatchedCount,
1380                          StringRef Buffer, bool VerboseVerbose,
1381                          std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
1382   PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1383                MatchedCount, Buffer, VerboseVerbose, Diags,
1384                std::move(MatchErrors));
1385 }
1386 
1387 /// Counts the number of newlines in the specified range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1388 static unsigned CountNumNewlinesBetween(StringRef Range,
1389                                         const char *&FirstNewLine) {
1390   unsigned NumNewLines = 0;
1391   while (1) {
1392     // Scan for newline.
1393     Range = Range.substr(Range.find_first_of("\n\r"));
1394     if (Range.empty())
1395       return NumNewLines;
1396 
1397     ++NumNewLines;
1398 
1399     // Handle \n\r and \r\n as a single newline.
1400     if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1401         (Range[0] != Range[1]))
1402       Range = Range.substr(1);
1403     Range = Range.substr(1);
1404 
1405     if (NumNewLines == 1)
1406       FirstNewLine = Range.begin();
1407   }
1408 }
1409 
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1410 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
1411                               bool IsLabelScanMode, size_t &MatchLen,
1412                               FileCheckRequest &Req,
1413                               std::vector<FileCheckDiag> *Diags) const {
1414   size_t LastPos = 0;
1415   std::vector<const Pattern *> NotStrings;
1416 
1417   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1418   // bounds; we have not processed variable definitions within the bounded block
1419   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1420   // over the block again (including the last CHECK-LABEL) in normal mode.
1421   if (!IsLabelScanMode) {
1422     // Match "dag strings" (with mixed "not strings" if any).
1423     LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
1424     if (LastPos == StringRef::npos)
1425       return StringRef::npos;
1426   }
1427 
1428   // Match itself from the last position after matching CHECK-DAG.
1429   size_t LastMatchEnd = LastPos;
1430   size_t FirstMatchPos = 0;
1431   // Go match the pattern Count times. Majority of patterns only match with
1432   // count 1 though.
1433   assert(Pat.getCount() != 0 && "pattern count can not be zero");
1434   for (int i = 1; i <= Pat.getCount(); i++) {
1435     StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
1436     size_t CurrentMatchLen;
1437     // get a match at current start point
1438     Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM);
1439 
1440     // report
1441     if (!MatchResult) {
1442       PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags,
1443                    MatchResult.takeError());
1444       return StringRef::npos;
1445     }
1446     size_t MatchPos = *MatchResult;
1447     PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
1448                Diags);
1449     if (i == 1)
1450       FirstMatchPos = LastPos + MatchPos;
1451 
1452     // move start point after the match
1453     LastMatchEnd += MatchPos + CurrentMatchLen;
1454   }
1455   // Full match len counts from first match pos.
1456   MatchLen = LastMatchEnd - FirstMatchPos;
1457 
1458   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1459   // or CHECK-NOT
1460   if (!IsLabelScanMode) {
1461     size_t MatchPos = FirstMatchPos - LastPos;
1462     StringRef MatchBuffer = Buffer.substr(LastPos);
1463     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1464 
1465     // If this check is a "CHECK-NEXT", verify that the previous match was on
1466     // the previous line (i.e. that there is one newline between them).
1467     if (CheckNext(SM, SkippedRegion)) {
1468       ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1469                          Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1470                          Diags, Req.Verbose);
1471       return StringRef::npos;
1472     }
1473 
1474     // If this check is a "CHECK-SAME", verify that the previous match was on
1475     // the same line (i.e. that there is no newline between them).
1476     if (CheckSame(SM, SkippedRegion)) {
1477       ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1478                          Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1479                          Diags, Req.Verbose);
1480       return StringRef::npos;
1481     }
1482 
1483     // If this match had "not strings", verify that they don't exist in the
1484     // skipped region.
1485     if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1486       return StringRef::npos;
1487   }
1488 
1489   return FirstMatchPos;
1490 }
1491 
CheckNext(const SourceMgr & SM,StringRef Buffer) const1492 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1493   if (Pat.getCheckTy() != Check::CheckNext &&
1494       Pat.getCheckTy() != Check::CheckEmpty)
1495     return false;
1496 
1497   Twine CheckName =
1498       Prefix +
1499       Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1500 
1501   // Count the number of newlines between the previous match and this one.
1502   const char *FirstNewLine = nullptr;
1503   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1504 
1505   if (NumNewLines == 0) {
1506     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1507                     CheckName + ": is on the same line as previous match");
1508     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1509                     "'next' match was here");
1510     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1511                     "previous match ended here");
1512     return true;
1513   }
1514 
1515   if (NumNewLines != 1) {
1516     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1517                     CheckName +
1518                         ": is not on the line after the previous match");
1519     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1520                     "'next' match was here");
1521     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1522                     "previous match ended here");
1523     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1524                     "non-matching line after previous match is here");
1525     return true;
1526   }
1527 
1528   return false;
1529 }
1530 
CheckSame(const SourceMgr & SM,StringRef Buffer) const1531 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1532   if (Pat.getCheckTy() != Check::CheckSame)
1533     return false;
1534 
1535   // Count the number of newlines between the previous match and this one.
1536   const char *FirstNewLine = nullptr;
1537   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1538 
1539   if (NumNewLines != 0) {
1540     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1541                     Prefix +
1542                         "-SAME: is not on the same line as the previous match");
1543     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1544                     "'next' match was here");
1545     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1546                     "previous match ended here");
1547     return true;
1548   }
1549 
1550   return false;
1551 }
1552 
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1553 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1554                                const std::vector<const Pattern *> &NotStrings,
1555                                const FileCheckRequest &Req,
1556                                std::vector<FileCheckDiag> *Diags) const {
1557   for (const Pattern *Pat : NotStrings) {
1558     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1559 
1560     size_t MatchLen = 0;
1561     Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM);
1562 
1563     if (!MatchResult) {
1564       PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
1565                    Req.VerboseVerbose, Diags, MatchResult.takeError());
1566       continue;
1567     }
1568     size_t Pos = *MatchResult;
1569 
1570     PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
1571                Req, Diags);
1572 
1573     return true;
1574   }
1575 
1576   return false;
1577 }
1578 
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1579 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1580                                  std::vector<const Pattern *> &NotStrings,
1581                                  const FileCheckRequest &Req,
1582                                  std::vector<FileCheckDiag> *Diags) const {
1583   if (DagNotStrings.empty())
1584     return 0;
1585 
1586   // The start of the search range.
1587   size_t StartPos = 0;
1588 
1589   struct MatchRange {
1590     size_t Pos;
1591     size_t End;
1592   };
1593   // A sorted list of ranges for non-overlapping CHECK-DAG matches.  Match
1594   // ranges are erased from this list once they are no longer in the search
1595   // range.
1596   std::list<MatchRange> MatchRanges;
1597 
1598   // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
1599   // group, so we don't use a range-based for loop here.
1600   for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
1601        PatItr != PatEnd; ++PatItr) {
1602     const Pattern &Pat = *PatItr;
1603     assert((Pat.getCheckTy() == Check::CheckDAG ||
1604             Pat.getCheckTy() == Check::CheckNot) &&
1605            "Invalid CHECK-DAG or CHECK-NOT!");
1606 
1607     if (Pat.getCheckTy() == Check::CheckNot) {
1608       NotStrings.push_back(&Pat);
1609       continue;
1610     }
1611 
1612     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1613 
1614     // CHECK-DAG always matches from the start.
1615     size_t MatchLen = 0, MatchPos = StartPos;
1616 
1617     // Search for a match that doesn't overlap a previous match in this
1618     // CHECK-DAG group.
1619     for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1620       StringRef MatchBuffer = Buffer.substr(MatchPos);
1621       Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM);
1622       // With a group of CHECK-DAGs, a single mismatching means the match on
1623       // that group of CHECK-DAGs fails immediately.
1624       if (!MatchResult) {
1625         PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
1626                      Req.VerboseVerbose, Diags, MatchResult.takeError());
1627         return StringRef::npos;
1628       }
1629       size_t MatchPosBuf = *MatchResult;
1630       // Re-calc it as the offset relative to the start of the original string.
1631       MatchPos += MatchPosBuf;
1632       if (Req.VerboseVerbose)
1633         PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1634                    MatchLen, Req, Diags);
1635       MatchRange M{MatchPos, MatchPos + MatchLen};
1636       if (Req.AllowDeprecatedDagOverlap) {
1637         // We don't need to track all matches in this mode, so we just maintain
1638         // one match range that encompasses the current CHECK-DAG group's
1639         // matches.
1640         if (MatchRanges.empty())
1641           MatchRanges.insert(MatchRanges.end(), M);
1642         else {
1643           auto Block = MatchRanges.begin();
1644           Block->Pos = std::min(Block->Pos, M.Pos);
1645           Block->End = std::max(Block->End, M.End);
1646         }
1647         break;
1648       }
1649       // Iterate previous matches until overlapping match or insertion point.
1650       bool Overlap = false;
1651       for (; MI != ME; ++MI) {
1652         if (M.Pos < MI->End) {
1653           // !Overlap => New match has no overlap and is before this old match.
1654           // Overlap => New match overlaps this old match.
1655           Overlap = MI->Pos < M.End;
1656           break;
1657         }
1658       }
1659       if (!Overlap) {
1660         // Insert non-overlapping match into list.
1661         MatchRanges.insert(MI, M);
1662         break;
1663       }
1664       if (Req.VerboseVerbose) {
1665         // Due to their verbosity, we don't print verbose diagnostics here if
1666         // we're gathering them for a different rendering, but we always print
1667         // other diagnostics.
1668         if (!Diags) {
1669           SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1670           SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1671           SMRange OldRange(OldStart, OldEnd);
1672           SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1673                           "match discarded, overlaps earlier DAG match here",
1674                           {OldRange});
1675         } else
1676           Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
1677       }
1678       MatchPos = MI->End;
1679     }
1680     if (!Req.VerboseVerbose)
1681       PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1682                  MatchLen, Req, Diags);
1683 
1684     // Handle the end of a CHECK-DAG group.
1685     if (std::next(PatItr) == PatEnd ||
1686         std::next(PatItr)->getCheckTy() == Check::CheckNot) {
1687       if (!NotStrings.empty()) {
1688         // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
1689         // CHECK-DAG, verify that there are no 'not' strings occurred in that
1690         // region.
1691         StringRef SkippedRegion =
1692             Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1693         if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1694           return StringRef::npos;
1695         // Clear "not strings".
1696         NotStrings.clear();
1697       }
1698       // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
1699       // end of this CHECK-DAG group's match range.
1700       StartPos = MatchRanges.rbegin()->End;
1701       // Don't waste time checking for (impossible) overlaps before that.
1702       MatchRanges.clear();
1703     }
1704   }
1705 
1706   return StartPos;
1707 }
1708 
1709 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1710 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1711   static const Regex Validator("^[a-zA-Z0-9_-]*$");
1712   return Validator.match(CheckPrefix);
1713 }
1714 
ValidateCheckPrefixes()1715 bool FileCheck::ValidateCheckPrefixes() {
1716   StringSet<> PrefixSet;
1717 
1718   for (StringRef Prefix : Req.CheckPrefixes) {
1719     // Reject empty prefixes.
1720     if (Prefix == "")
1721       return false;
1722 
1723     if (!PrefixSet.insert(Prefix).second)
1724       return false;
1725 
1726     if (!ValidateCheckPrefix(Prefix))
1727       return false;
1728   }
1729 
1730   return true;
1731 }
1732 
buildCheckPrefixRegex()1733 Regex FileCheck::buildCheckPrefixRegex() {
1734   // I don't think there's a way to specify an initial value for cl::list,
1735   // so if nothing was specified, add the default
1736   if (Req.CheckPrefixes.empty())
1737     Req.CheckPrefixes.push_back("CHECK");
1738 
1739   // We already validated the contents of CheckPrefixes so just concatenate
1740   // them as alternatives.
1741   SmallString<32> PrefixRegexStr;
1742   for (StringRef Prefix : Req.CheckPrefixes) {
1743     if (Prefix != Req.CheckPrefixes.front())
1744       PrefixRegexStr.push_back('|');
1745 
1746     PrefixRegexStr.append(Prefix);
1747   }
1748 
1749   return Regex(PrefixRegexStr);
1750 }
1751 
defineCmdlineVariables(std::vector<std::string> & CmdlineDefines,SourceMgr & SM)1752 Error FileCheckPatternContext::defineCmdlineVariables(
1753     std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
1754   assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
1755          "Overriding defined variable with command-line variable definitions");
1756 
1757   if (CmdlineDefines.empty())
1758     return Error::success();
1759 
1760   // Create a string representing the vector of command-line definitions. Each
1761   // definition is on its own line and prefixed with a definition number to
1762   // clarify which definition a given diagnostic corresponds to.
1763   unsigned I = 0;
1764   Error Errs = Error::success();
1765   std::string CmdlineDefsDiag;
1766   SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices;
1767   for (StringRef CmdlineDef : CmdlineDefines) {
1768     std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str();
1769     size_t EqIdx = CmdlineDef.find('=');
1770     if (EqIdx == StringRef::npos) {
1771       CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0));
1772       continue;
1773     }
1774     // Numeric variable definition.
1775     if (CmdlineDef[0] == '#') {
1776       // Append a copy of the command-line definition adapted to use the same
1777       // format as in the input file to be able to reuse
1778       // parseNumericSubstitutionBlock.
1779       CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str();
1780       std::string SubstitutionStr = CmdlineDef;
1781       SubstitutionStr[EqIdx] = ':';
1782       CmdlineDefsIndices.push_back(
1783           std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size()));
1784       CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str();
1785     } else {
1786       CmdlineDefsDiag += DefPrefix;
1787       CmdlineDefsIndices.push_back(
1788           std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size()));
1789       CmdlineDefsDiag += (CmdlineDef + "\n").str();
1790     }
1791   }
1792 
1793   // Create a buffer with fake command line content in order to display
1794   // parsing diagnostic with location information and point to the
1795   // global definition with invalid syntax.
1796   std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
1797       MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
1798   StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
1799   SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
1800 
1801   for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) {
1802     StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first,
1803                                                      CmdlineDefIndices.second);
1804     if (CmdlineDef.empty()) {
1805       Errs = joinErrors(
1806           std::move(Errs),
1807           ErrorDiagnostic::get(SM, CmdlineDef,
1808                                "missing equal sign in global definition"));
1809       continue;
1810     }
1811 
1812     // Numeric variable definition.
1813     if (CmdlineDef[0] == '#') {
1814       // Now parse the definition both to check that the syntax is correct and
1815       // to create the necessary class instance.
1816       StringRef CmdlineDefExpr = CmdlineDef.substr(1);
1817       Optional<NumericVariable *> DefinedNumericVariable;
1818       Expected<std::unique_ptr<ExpressionAST>> ExpressionASTResult =
1819           Pattern::parseNumericSubstitutionBlock(
1820               CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM);
1821       if (!ExpressionASTResult) {
1822         Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError());
1823         continue;
1824       }
1825       std::unique_ptr<ExpressionAST> ExpressionASTPointer =
1826           std::move(*ExpressionASTResult);
1827       // Now evaluate the expression whose value this variable should be set
1828       // to, since the expression of a command-line variable definition should
1829       // only use variables defined earlier on the command-line. If not, this
1830       // is an error and we report it.
1831       Expected<uint64_t> Value = ExpressionASTPointer->eval();
1832       if (!Value) {
1833         Errs = joinErrors(std::move(Errs), Value.takeError());
1834         continue;
1835       }
1836 
1837       assert(DefinedNumericVariable && "No variable defined");
1838       (*DefinedNumericVariable)->setValue(*Value);
1839 
1840       // Record this variable definition.
1841       GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] =
1842           *DefinedNumericVariable;
1843     } else {
1844       // String variable definition.
1845       std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
1846       StringRef CmdlineName = CmdlineNameVal.first;
1847       StringRef OrigCmdlineName = CmdlineName;
1848       Expected<Pattern::VariableProperties> ParseVarResult =
1849           Pattern::parseVariable(CmdlineName, SM);
1850       if (!ParseVarResult) {
1851         Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
1852         continue;
1853       }
1854       // Check that CmdlineName does not denote a pseudo variable is only
1855       // composed of the parsed numeric variable. This catches cases like
1856       // "FOO+2" in a "FOO+2=10" definition.
1857       if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
1858         Errs = joinErrors(std::move(Errs),
1859                           ErrorDiagnostic::get(
1860                               SM, OrigCmdlineName,
1861                               "invalid name in string variable definition '" +
1862                                   OrigCmdlineName + "'"));
1863         continue;
1864       }
1865       StringRef Name = ParseVarResult->Name;
1866 
1867       // Detect collisions between string and numeric variables when the former
1868       // is created later than the latter.
1869       if (GlobalNumericVariableTable.find(Name) !=
1870           GlobalNumericVariableTable.end()) {
1871         Errs = joinErrors(std::move(Errs),
1872                           ErrorDiagnostic::get(SM, Name,
1873                                                "numeric variable with name '" +
1874                                                    Name + "' already exists"));
1875         continue;
1876       }
1877       GlobalVariableTable.insert(CmdlineNameVal);
1878       // Mark the string variable as defined to detect collisions between
1879       // string and numeric variables in defineCmdlineVariables when the latter
1880       // is created later than the former. We cannot reuse GlobalVariableTable
1881       // for this by populating it with an empty string since we would then
1882       // lose the ability to detect the use of an undefined variable in
1883       // match().
1884       DefinedVariableTable[Name] = true;
1885     }
1886   }
1887 
1888   return Errs;
1889 }
1890 
clearLocalVars()1891 void FileCheckPatternContext::clearLocalVars() {
1892   SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
1893   for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
1894     if (Var.first()[0] != '$')
1895       LocalPatternVars.push_back(Var.first());
1896 
1897   // Numeric substitution reads the value of a variable directly, not via
1898   // GlobalNumericVariableTable. Therefore, we clear local variables by
1899   // clearing their value which will lead to a numeric substitution failure. We
1900   // also mark the variable for removal from GlobalNumericVariableTable since
1901   // this is what defineCmdlineVariables checks to decide that no global
1902   // variable has been defined.
1903   for (const auto &Var : GlobalNumericVariableTable)
1904     if (Var.first()[0] != '$') {
1905       Var.getValue()->clearValue();
1906       LocalNumericVars.push_back(Var.first());
1907     }
1908 
1909   for (const auto &Var : LocalPatternVars)
1910     GlobalVariableTable.erase(Var);
1911   for (const auto &Var : LocalNumericVars)
1912     GlobalNumericVariableTable.erase(Var);
1913 }
1914 
checkInput(SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags)1915 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer,
1916                            std::vector<FileCheckDiag> *Diags) {
1917   bool ChecksFailed = false;
1918 
1919   unsigned i = 0, j = 0, e = CheckStrings->size();
1920   while (true) {
1921     StringRef CheckRegion;
1922     if (j == e) {
1923       CheckRegion = Buffer;
1924     } else {
1925       const FileCheckString &CheckLabelStr = (*CheckStrings)[j];
1926       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1927         ++j;
1928         continue;
1929       }
1930 
1931       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1932       size_t MatchLabelLen = 0;
1933       size_t MatchLabelPos =
1934           CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
1935       if (MatchLabelPos == StringRef::npos)
1936         // Immediately bail if CHECK-LABEL fails, nothing else we can do.
1937         return false;
1938 
1939       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1940       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1941       ++j;
1942     }
1943 
1944     // Do not clear the first region as it's the one before the first
1945     // CHECK-LABEL and it would clear variables defined on the command-line
1946     // before they get used.
1947     if (i != 0 && Req.EnableVarScope)
1948       PatternContext->clearLocalVars();
1949 
1950     for (; i != j; ++i) {
1951       const FileCheckString &CheckStr = (*CheckStrings)[i];
1952 
1953       // Check each string within the scanned region, including a second check
1954       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1955       size_t MatchLen = 0;
1956       size_t MatchPos =
1957           CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
1958 
1959       if (MatchPos == StringRef::npos) {
1960         ChecksFailed = true;
1961         i = j;
1962         break;
1963       }
1964 
1965       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1966     }
1967 
1968     if (j == e)
1969       break;
1970   }
1971 
1972   // Success if no checks failed.
1973   return !ChecksFailed;
1974 }
1975