1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileCheck does a line-by line check of a file that validates whether it
10 // contains the expected content. This is useful for regression tests etc.
11 //
12 // This file implements most of the API that will be used by the FileCheck utility
13 // as well as various unittests.
14 //===----------------------------------------------------------------------===//
15
16 #include "llvm/Support/FileCheck.h"
17 #include "FileCheckImpl.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Support/FormatVariadic.h"
21 #include <cstdint>
22 #include <list>
23 #include <tuple>
24 #include <utility>
25
26 using namespace llvm;
27
eval() const28 Expected<uint64_t> NumericVariableUse::eval() const {
29 Optional<uint64_t> Value = Variable->getValue();
30 if (Value)
31 return *Value;
32
33 return make_error<UndefVarError>(Name);
34 }
35
eval() const36 Expected<uint64_t> BinaryOperation::eval() const {
37 Expected<uint64_t> LeftOp = LeftOperand->eval();
38 Expected<uint64_t> RightOp = RightOperand->eval();
39
40 // Bubble up any error (e.g. undefined variables) in the recursive
41 // evaluation.
42 if (!LeftOp || !RightOp) {
43 Error Err = Error::success();
44 if (!LeftOp)
45 Err = joinErrors(std::move(Err), LeftOp.takeError());
46 if (!RightOp)
47 Err = joinErrors(std::move(Err), RightOp.takeError());
48 return std::move(Err);
49 }
50
51 return EvalBinop(*LeftOp, *RightOp);
52 }
53
getResult() const54 Expected<std::string> NumericSubstitution::getResult() const {
55 Expected<uint64_t> EvaluatedValue = ExpressionASTPointer->eval();
56 if (!EvaluatedValue)
57 return EvaluatedValue.takeError();
58 return utostr(*EvaluatedValue);
59 }
60
getResult() const61 Expected<std::string> StringSubstitution::getResult() const {
62 // Look up the value and escape it so that we can put it into the regex.
63 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
64 if (!VarVal)
65 return VarVal.takeError();
66 return Regex::escape(*VarVal);
67 }
68
isValidVarNameStart(char C)69 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isalpha(C); }
70
71 Expected<Pattern::VariableProperties>
parseVariable(StringRef & Str,const SourceMgr & SM)72 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
73 if (Str.empty())
74 return ErrorDiagnostic::get(SM, Str, "empty variable name");
75
76 bool ParsedOneChar = false;
77 unsigned I = 0;
78 bool IsPseudo = Str[0] == '@';
79
80 // Global vars start with '$'.
81 if (Str[0] == '$' || IsPseudo)
82 ++I;
83
84 for (unsigned E = Str.size(); I != E; ++I) {
85 if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
86 return ErrorDiagnostic::get(SM, Str, "invalid variable name");
87
88 // Variable names are composed of alphanumeric characters and underscores.
89 if (Str[I] != '_' && !isalnum(Str[I]))
90 break;
91 ParsedOneChar = true;
92 }
93
94 StringRef Name = Str.take_front(I);
95 Str = Str.substr(I);
96 return VariableProperties {Name, IsPseudo};
97 }
98
99 // StringRef holding all characters considered as horizontal whitespaces by
100 // FileCheck input canonicalization.
101 constexpr StringLiteral SpaceChars = " \t";
102
103 // Parsing helper function that strips the first character in S and returns it.
popFront(StringRef & S)104 static char popFront(StringRef &S) {
105 char C = S.front();
106 S = S.drop_front();
107 return C;
108 }
109
110 char UndefVarError::ID = 0;
111 char ErrorDiagnostic::ID = 0;
112 char NotFoundError::ID = 0;
113
parseNumericVariableDefinition(StringRef & Expr,FileCheckPatternContext * Context,Optional<size_t> LineNumber,const SourceMgr & SM)114 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
115 StringRef &Expr, FileCheckPatternContext *Context,
116 Optional<size_t> LineNumber, const SourceMgr &SM) {
117 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
118 if (!ParseVarResult)
119 return ParseVarResult.takeError();
120 StringRef Name = ParseVarResult->Name;
121
122 if (ParseVarResult->IsPseudo)
123 return ErrorDiagnostic::get(
124 SM, Name, "definition of pseudo numeric variable unsupported");
125
126 // Detect collisions between string and numeric variables when the latter
127 // is created later than the former.
128 if (Context->DefinedVariableTable.find(Name) !=
129 Context->DefinedVariableTable.end())
130 return ErrorDiagnostic::get(
131 SM, Name, "string variable with name '" + Name + "' already exists");
132
133 Expr = Expr.ltrim(SpaceChars);
134 if (!Expr.empty())
135 return ErrorDiagnostic::get(
136 SM, Expr, "unexpected characters after numeric variable name");
137
138 NumericVariable *DefinedNumericVariable;
139 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
140 if (VarTableIter != Context->GlobalNumericVariableTable.end())
141 DefinedNumericVariable = VarTableIter->second;
142 else
143 DefinedNumericVariable = Context->makeNumericVariable(Name, LineNumber);
144
145 return DefinedNumericVariable;
146 }
147
parseNumericVariableUse(StringRef Name,bool IsPseudo,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)148 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse(
149 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber,
150 FileCheckPatternContext *Context, const SourceMgr &SM) {
151 if (IsPseudo && !Name.equals("@LINE"))
152 return ErrorDiagnostic::get(
153 SM, Name, "invalid pseudo numeric variable '" + Name + "'");
154
155 // Numeric variable definitions and uses are parsed in the order in which
156 // they appear in the CHECK patterns. For each definition, the pointer to the
157 // class instance of the corresponding numeric variable definition is stored
158 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
159 // we get below is null, it means no such variable was defined before. When
160 // that happens, we create a dummy variable so that parsing can continue. All
161 // uses of undefined variables, whether string or numeric, are then diagnosed
162 // in printSubstitutions() after failing to match.
163 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
164 NumericVariable *NumericVariable;
165 if (VarTableIter != Context->GlobalNumericVariableTable.end())
166 NumericVariable = VarTableIter->second;
167 else {
168 NumericVariable = Context->makeNumericVariable(Name);
169 Context->GlobalNumericVariableTable[Name] = NumericVariable;
170 }
171
172 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
173 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
174 return ErrorDiagnostic::get(
175 SM, Name,
176 "numeric variable '" + Name +
177 "' defined earlier in the same CHECK directive");
178
179 return std::make_unique<NumericVariableUse>(Name, NumericVariable);
180 }
181
parseNumericOperand(StringRef & Expr,AllowedOperand AO,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)182 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
183 StringRef &Expr, AllowedOperand AO, Optional<size_t> LineNumber,
184 FileCheckPatternContext *Context, const SourceMgr &SM) {
185 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
186 // Try to parse as a numeric variable use.
187 Expected<Pattern::VariableProperties> ParseVarResult =
188 parseVariable(Expr, SM);
189 if (ParseVarResult)
190 return parseNumericVariableUse(ParseVarResult->Name,
191 ParseVarResult->IsPseudo, LineNumber,
192 Context, SM);
193 if (AO == AllowedOperand::LineVar)
194 return ParseVarResult.takeError();
195 // Ignore the error and retry parsing as a literal.
196 consumeError(ParseVarResult.takeError());
197 }
198
199 // Otherwise, parse it as a literal.
200 uint64_t LiteralValue;
201 if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue))
202 return std::make_unique<ExpressionLiteral>(LiteralValue);
203
204 return ErrorDiagnostic::get(SM, Expr,
205 "invalid operand format '" + Expr + "'");
206 }
207
add(uint64_t LeftOp,uint64_t RightOp)208 static uint64_t add(uint64_t LeftOp, uint64_t RightOp) {
209 return LeftOp + RightOp;
210 }
211
sub(uint64_t LeftOp,uint64_t RightOp)212 static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) {
213 return LeftOp - RightOp;
214 }
215
216 Expected<std::unique_ptr<ExpressionAST>>
parseBinop(StringRef & Expr,std::unique_ptr<ExpressionAST> LeftOp,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)217 Pattern::parseBinop(StringRef &Expr, std::unique_ptr<ExpressionAST> LeftOp,
218 bool IsLegacyLineExpr, Optional<size_t> LineNumber,
219 FileCheckPatternContext *Context, const SourceMgr &SM) {
220 Expr = Expr.ltrim(SpaceChars);
221 if (Expr.empty())
222 return std::move(LeftOp);
223
224 // Check if this is a supported operation and select a function to perform
225 // it.
226 SMLoc OpLoc = SMLoc::getFromPointer(Expr.data());
227 char Operator = popFront(Expr);
228 binop_eval_t EvalBinop;
229 switch (Operator) {
230 case '+':
231 EvalBinop = add;
232 break;
233 case '-':
234 EvalBinop = sub;
235 break;
236 default:
237 return ErrorDiagnostic::get(
238 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
239 }
240
241 // Parse right operand.
242 Expr = Expr.ltrim(SpaceChars);
243 if (Expr.empty())
244 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression");
245 // The second operand in a legacy @LINE expression is always a literal.
246 AllowedOperand AO =
247 IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any;
248 Expected<std::unique_ptr<ExpressionAST>> RightOpResult =
249 parseNumericOperand(Expr, AO, LineNumber, Context, SM);
250 if (!RightOpResult)
251 return RightOpResult;
252
253 Expr = Expr.ltrim(SpaceChars);
254 return std::make_unique<BinaryOperation>(EvalBinop, std::move(LeftOp),
255 std::move(*RightOpResult));
256 }
257
parseNumericSubstitutionBlock(StringRef Expr,Optional<NumericVariable * > & DefinedNumericVariable,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)258 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericSubstitutionBlock(
259 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable,
260 bool IsLegacyLineExpr, Optional<size_t> LineNumber,
261 FileCheckPatternContext *Context, const SourceMgr &SM) {
262 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr;
263 StringRef DefExpr = StringRef();
264 DefinedNumericVariable = None;
265 // Save variable definition expression if any.
266 size_t DefEnd = Expr.find(':');
267 if (DefEnd != StringRef::npos) {
268 DefExpr = Expr.substr(0, DefEnd);
269 Expr = Expr.substr(DefEnd + 1);
270 }
271
272 // Parse the expression itself.
273 Expr = Expr.ltrim(SpaceChars);
274 if (!Expr.empty()) {
275 // The first operand in a legacy @LINE expression is always the @LINE
276 // pseudo variable.
277 AllowedOperand AO =
278 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
279 Expected<std::unique_ptr<ExpressionAST>> ParseResult =
280 parseNumericOperand(Expr, AO, LineNumber, Context, SM);
281 while (ParseResult && !Expr.empty()) {
282 ParseResult = parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr,
283 LineNumber, Context, SM);
284 // Legacy @LINE expressions only allow 2 operands.
285 if (ParseResult && IsLegacyLineExpr && !Expr.empty())
286 return ErrorDiagnostic::get(
287 SM, Expr,
288 "unexpected characters at end of expression '" + Expr + "'");
289 }
290 if (!ParseResult)
291 return ParseResult;
292 ExpressionASTPointer = std::move(*ParseResult);
293 }
294
295 // Parse the numeric variable definition.
296 if (DefEnd != StringRef::npos) {
297 DefExpr = DefExpr.ltrim(SpaceChars);
298 Expected<NumericVariable *> ParseResult =
299 parseNumericVariableDefinition(DefExpr, Context, LineNumber, SM);
300
301 if (!ParseResult)
302 return ParseResult.takeError();
303 DefinedNumericVariable = *ParseResult;
304 }
305
306 return std::move(ExpressionASTPointer);
307 }
308
parsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,const FileCheckRequest & Req)309 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
310 SourceMgr &SM, const FileCheckRequest &Req) {
311 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
312 IgnoreCase = Req.IgnoreCase;
313
314 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
315
316 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
317 // Ignore trailing whitespace.
318 while (!PatternStr.empty() &&
319 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
320 PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
321
322 // Check that there is something on the line.
323 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
324 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
325 "found empty check string with prefix '" + Prefix + ":'");
326 return true;
327 }
328
329 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
330 SM.PrintMessage(
331 PatternLoc, SourceMgr::DK_Error,
332 "found non-empty check string for empty check with prefix '" + Prefix +
333 ":'");
334 return true;
335 }
336
337 if (CheckTy == Check::CheckEmpty) {
338 RegExStr = "(\n$)";
339 return false;
340 }
341
342 // Check to see if this is a fixed string, or if it has regex pieces.
343 if (!MatchFullLinesHere &&
344 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
345 PatternStr.find("[[") == StringRef::npos))) {
346 FixedStr = PatternStr;
347 return false;
348 }
349
350 if (MatchFullLinesHere) {
351 RegExStr += '^';
352 if (!Req.NoCanonicalizeWhiteSpace)
353 RegExStr += " *";
354 }
355
356 // Paren value #0 is for the fully matched string. Any new parenthesized
357 // values add from there.
358 unsigned CurParen = 1;
359
360 // Otherwise, there is at least one regex piece. Build up the regex pattern
361 // by escaping scary characters in fixed strings, building up one big regex.
362 while (!PatternStr.empty()) {
363 // RegEx matches.
364 if (PatternStr.startswith("{{")) {
365 // This is the start of a regex match. Scan for the }}.
366 size_t End = PatternStr.find("}}");
367 if (End == StringRef::npos) {
368 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
369 SourceMgr::DK_Error,
370 "found start of regex string with no end '}}'");
371 return true;
372 }
373
374 // Enclose {{}} patterns in parens just like [[]] even though we're not
375 // capturing the result for any purpose. This is required in case the
376 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
377 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
378 RegExStr += '(';
379 ++CurParen;
380
381 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
382 return true;
383 RegExStr += ')';
384
385 PatternStr = PatternStr.substr(End + 2);
386 continue;
387 }
388
389 // String and numeric substitution blocks. Pattern substitution blocks come
390 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
391 // other regex) and assigns it to the string variable 'foo'. The latter
392 // substitutes foo's value. Numeric substitution blocks recognize the same
393 // form as string ones, but start with a '#' sign after the double
394 // brackets. They also accept a combined form which sets a numeric variable
395 // to the evaluation of an expression. Both string and numeric variable
396 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
397 // valid, as this helps catch some common errors.
398 if (PatternStr.startswith("[[")) {
399 StringRef UnparsedPatternStr = PatternStr.substr(2);
400 // Find the closing bracket pair ending the match. End is going to be an
401 // offset relative to the beginning of the match string.
402 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
403 StringRef MatchStr = UnparsedPatternStr.substr(0, End);
404 bool IsNumBlock = MatchStr.consume_front("#");
405
406 if (End == StringRef::npos) {
407 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
408 SourceMgr::DK_Error,
409 "Invalid substitution block, no ]] found");
410 return true;
411 }
412 // Strip the substitution block we are parsing. End points to the start
413 // of the "]]" closing the expression so account for it in computing the
414 // index of the first unparsed character.
415 PatternStr = UnparsedPatternStr.substr(End + 2);
416
417 bool IsDefinition = false;
418 bool SubstNeeded = false;
419 // Whether the substitution block is a legacy use of @LINE with string
420 // substitution block syntax.
421 bool IsLegacyLineExpr = false;
422 StringRef DefName;
423 StringRef SubstStr;
424 StringRef MatchRegexp;
425 size_t SubstInsertIdx = RegExStr.size();
426
427 // Parse string variable or legacy @LINE expression.
428 if (!IsNumBlock) {
429 size_t VarEndIdx = MatchStr.find(":");
430 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
431 if (SpacePos != StringRef::npos) {
432 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
433 SourceMgr::DK_Error, "unexpected whitespace");
434 return true;
435 }
436
437 // Get the name (e.g. "foo") and verify it is well formed.
438 StringRef OrigMatchStr = MatchStr;
439 Expected<Pattern::VariableProperties> ParseVarResult =
440 parseVariable(MatchStr, SM);
441 if (!ParseVarResult) {
442 logAllUnhandledErrors(ParseVarResult.takeError(), errs());
443 return true;
444 }
445 StringRef Name = ParseVarResult->Name;
446 bool IsPseudo = ParseVarResult->IsPseudo;
447
448 IsDefinition = (VarEndIdx != StringRef::npos);
449 SubstNeeded = !IsDefinition;
450 if (IsDefinition) {
451 if ((IsPseudo || !MatchStr.consume_front(":"))) {
452 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
453 SourceMgr::DK_Error,
454 "invalid name in string variable definition");
455 return true;
456 }
457
458 // Detect collisions between string and numeric variables when the
459 // former is created later than the latter.
460 if (Context->GlobalNumericVariableTable.find(Name) !=
461 Context->GlobalNumericVariableTable.end()) {
462 SM.PrintMessage(
463 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
464 "numeric variable with name '" + Name + "' already exists");
465 return true;
466 }
467 DefName = Name;
468 MatchRegexp = MatchStr;
469 } else {
470 if (IsPseudo) {
471 MatchStr = OrigMatchStr;
472 IsLegacyLineExpr = IsNumBlock = true;
473 } else
474 SubstStr = Name;
475 }
476 }
477
478 // Parse numeric substitution block.
479 std::unique_ptr<ExpressionAST> ExpressionASTPointer;
480 Optional<NumericVariable *> DefinedNumericVariable;
481 if (IsNumBlock) {
482 Expected<std::unique_ptr<ExpressionAST>> ParseResult =
483 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
484 IsLegacyLineExpr, LineNumber, Context,
485 SM);
486 if (!ParseResult) {
487 logAllUnhandledErrors(ParseResult.takeError(), errs());
488 return true;
489 }
490 ExpressionASTPointer = std::move(*ParseResult);
491 SubstNeeded = ExpressionASTPointer != nullptr;
492 if (DefinedNumericVariable) {
493 IsDefinition = true;
494 DefName = (*DefinedNumericVariable)->getName();
495 }
496 if (SubstNeeded)
497 SubstStr = MatchStr;
498 else
499 MatchRegexp = "[0-9]+";
500 }
501
502 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]].
503 if (IsDefinition) {
504 RegExStr += '(';
505 ++SubstInsertIdx;
506
507 if (IsNumBlock) {
508 NumericVariableMatch NumericVariableDefinition = {
509 *DefinedNumericVariable, CurParen};
510 NumericVariableDefs[DefName] = NumericVariableDefinition;
511 // This store is done here rather than in match() to allow
512 // parseNumericVariableUse() to get the pointer to the class instance
513 // of the right variable definition corresponding to a given numeric
514 // variable use.
515 Context->GlobalNumericVariableTable[DefName] =
516 *DefinedNumericVariable;
517 } else {
518 VariableDefs[DefName] = CurParen;
519 // Mark string variable as defined to detect collisions between
520 // string and numeric variables in parseNumericVariableUse() and
521 // defineCmdlineVariables() when the latter is created later than the
522 // former. We cannot reuse GlobalVariableTable for this by populating
523 // it with an empty string since we would then lose the ability to
524 // detect the use of an undefined variable in match().
525 Context->DefinedVariableTable[DefName] = true;
526 }
527
528 ++CurParen;
529 }
530
531 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM))
532 return true;
533
534 if (IsDefinition)
535 RegExStr += ')';
536
537 // Handle substitutions: [[foo]] and [[#<foo expr>]].
538 if (SubstNeeded) {
539 // Handle substitution of string variables that were defined earlier on
540 // the same line by emitting a backreference. Expressions do not
541 // support substituting a numeric variable defined on the same line.
542 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
543 unsigned CaptureParenGroup = VariableDefs[SubstStr];
544 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
545 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
546 SourceMgr::DK_Error,
547 "Can't back-reference more than 9 variables");
548 return true;
549 }
550 AddBackrefToRegEx(CaptureParenGroup);
551 } else {
552 // Handle substitution of string variables ([[<var>]]) defined in
553 // previous CHECK patterns, and substitution of expressions.
554 Substitution *Substitution =
555 IsNumBlock
556 ? Context->makeNumericSubstitution(
557 SubstStr, std::move(ExpressionASTPointer),
558 SubstInsertIdx)
559 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
560 Substitutions.push_back(Substitution);
561 }
562 }
563 }
564
565 // Handle fixed string matches.
566 // Find the end, which is the start of the next regex.
567 size_t FixedMatchEnd = PatternStr.find("{{");
568 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
569 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
570 PatternStr = PatternStr.substr(FixedMatchEnd);
571 }
572
573 if (MatchFullLinesHere) {
574 if (!Req.NoCanonicalizeWhiteSpace)
575 RegExStr += " *";
576 RegExStr += '$';
577 }
578
579 return false;
580 }
581
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)582 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
583 Regex R(RS);
584 std::string Error;
585 if (!R.isValid(Error)) {
586 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
587 "invalid regex: " + Error);
588 return true;
589 }
590
591 RegExStr += RS.str();
592 CurParen += R.getNumMatches();
593 return false;
594 }
595
AddBackrefToRegEx(unsigned BackrefNum)596 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
597 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
598 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
599 RegExStr += Backref;
600 }
601
match(StringRef Buffer,size_t & MatchLen,const SourceMgr & SM) const602 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen,
603 const SourceMgr &SM) const {
604 // If this is the EOF pattern, match it immediately.
605 if (CheckTy == Check::CheckEOF) {
606 MatchLen = 0;
607 return Buffer.size();
608 }
609
610 // If this is a fixed string pattern, just match it now.
611 if (!FixedStr.empty()) {
612 MatchLen = FixedStr.size();
613 size_t Pos =
614 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr);
615 if (Pos == StringRef::npos)
616 return make_error<NotFoundError>();
617 return Pos;
618 }
619
620 // Regex match.
621
622 // If there are substitutions, we need to create a temporary string with the
623 // actual value.
624 StringRef RegExToMatch = RegExStr;
625 std::string TmpStr;
626 if (!Substitutions.empty()) {
627 TmpStr = RegExStr;
628 if (LineNumber)
629 Context->LineVariable->setValue(*LineNumber);
630
631 size_t InsertOffset = 0;
632 // Substitute all string variables and expressions whose values are only
633 // now known. Use of string variables defined on the same line are handled
634 // by back-references.
635 for (const auto &Substitution : Substitutions) {
636 // Substitute and check for failure (e.g. use of undefined variable).
637 Expected<std::string> Value = Substitution->getResult();
638 if (!Value)
639 return Value.takeError();
640
641 // Plop it into the regex at the adjusted offset.
642 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
643 Value->begin(), Value->end());
644 InsertOffset += Value->size();
645 }
646
647 // Match the newly constructed regex.
648 RegExToMatch = TmpStr;
649 }
650
651 SmallVector<StringRef, 4> MatchInfo;
652 unsigned int Flags = Regex::Newline;
653 if (IgnoreCase)
654 Flags |= Regex::IgnoreCase;
655 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
656 return make_error<NotFoundError>();
657
658 // Successful regex match.
659 assert(!MatchInfo.empty() && "Didn't get any match");
660 StringRef FullMatch = MatchInfo[0];
661
662 // If this defines any string variables, remember their values.
663 for (const auto &VariableDef : VariableDefs) {
664 assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
665 Context->GlobalVariableTable[VariableDef.first] =
666 MatchInfo[VariableDef.second];
667 }
668
669 // If this defines any numeric variables, remember their values.
670 for (const auto &NumericVariableDef : NumericVariableDefs) {
671 const NumericVariableMatch &NumericVariableMatch =
672 NumericVariableDef.getValue();
673 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
674 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
675 NumericVariable *DefinedNumericVariable =
676 NumericVariableMatch.DefinedNumericVariable;
677
678 StringRef MatchedValue = MatchInfo[CaptureParenGroup];
679 uint64_t Val;
680 if (MatchedValue.getAsInteger(10, Val))
681 return ErrorDiagnostic::get(SM, MatchedValue,
682 "Unable to represent numeric value");
683 DefinedNumericVariable->setValue(Val);
684 }
685
686 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
687 // the required preceding newline, which is consumed by the pattern in the
688 // case of CHECK-EMPTY but not CHECK-NEXT.
689 size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
690 MatchLen = FullMatch.size() - MatchStartSkip;
691 return FullMatch.data() - Buffer.data() + MatchStartSkip;
692 }
693
computeMatchDistance(StringRef Buffer) const694 unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
695 // Just compute the number of matching characters. For regular expressions, we
696 // just compare against the regex itself and hope for the best.
697 //
698 // FIXME: One easy improvement here is have the regex lib generate a single
699 // example regular expression which matches, and use that as the example
700 // string.
701 StringRef ExampleString(FixedStr);
702 if (ExampleString.empty())
703 ExampleString = RegExStr;
704
705 // Only compare up to the first line in the buffer, or the string size.
706 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
707 BufferPrefix = BufferPrefix.split('\n').first;
708 return BufferPrefix.edit_distance(ExampleString);
709 }
710
printSubstitutions(const SourceMgr & SM,StringRef Buffer,SMRange MatchRange) const711 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
712 SMRange MatchRange) const {
713 // Print what we know about substitutions.
714 if (!Substitutions.empty()) {
715 for (const auto &Substitution : Substitutions) {
716 SmallString<256> Msg;
717 raw_svector_ostream OS(Msg);
718 Expected<std::string> MatchedValue = Substitution->getResult();
719
720 // Substitution failed or is not known at match time, print the undefined
721 // variables it uses.
722 if (!MatchedValue) {
723 bool UndefSeen = false;
724 handleAllErrors(MatchedValue.takeError(), [](const NotFoundError &E) {},
725 // Handled in PrintNoMatch().
726 [](const ErrorDiagnostic &E) {},
727 [&](const UndefVarError &E) {
728 if (!UndefSeen) {
729 OS << "uses undefined variable(s):";
730 UndefSeen = true;
731 }
732 OS << " ";
733 E.log(OS);
734 });
735 } else {
736 // Substitution succeeded. Print substituted value.
737 OS << "with \"";
738 OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
739 OS.write_escaped(*MatchedValue) << "\"";
740 }
741
742 if (MatchRange.isValid())
743 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
744 {MatchRange});
745 else
746 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
747 SourceMgr::DK_Note, OS.str());
748 }
749 }
750 }
751
ProcessMatchResult(FileCheckDiag::MatchType MatchTy,const SourceMgr & SM,SMLoc Loc,Check::FileCheckType CheckTy,StringRef Buffer,size_t Pos,size_t Len,std::vector<FileCheckDiag> * Diags,bool AdjustPrevDiag=false)752 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
753 const SourceMgr &SM, SMLoc Loc,
754 Check::FileCheckType CheckTy,
755 StringRef Buffer, size_t Pos, size_t Len,
756 std::vector<FileCheckDiag> *Diags,
757 bool AdjustPrevDiag = false) {
758 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
759 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
760 SMRange Range(Start, End);
761 if (Diags) {
762 if (AdjustPrevDiag)
763 Diags->rbegin()->MatchTy = MatchTy;
764 else
765 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
766 }
767 return Range;
768 }
769
printFuzzyMatch(const SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags) const770 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
771 std::vector<FileCheckDiag> *Diags) const {
772 // Attempt to find the closest/best fuzzy match. Usually an error happens
773 // because some string in the output didn't exactly match. In these cases, we
774 // would like to show the user a best guess at what "should have" matched, to
775 // save them having to actually check the input manually.
776 size_t NumLinesForward = 0;
777 size_t Best = StringRef::npos;
778 double BestQuality = 0;
779
780 // Use an arbitrary 4k limit on how far we will search.
781 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
782 if (Buffer[i] == '\n')
783 ++NumLinesForward;
784
785 // Patterns have leading whitespace stripped, so skip whitespace when
786 // looking for something which looks like a pattern.
787 if (Buffer[i] == ' ' || Buffer[i] == '\t')
788 continue;
789
790 // Compute the "quality" of this match as an arbitrary combination of the
791 // match distance and the number of lines skipped to get to this match.
792 unsigned Distance = computeMatchDistance(Buffer.substr(i));
793 double Quality = Distance + (NumLinesForward / 100.);
794
795 if (Quality < BestQuality || Best == StringRef::npos) {
796 Best = i;
797 BestQuality = Quality;
798 }
799 }
800
801 // Print the "possible intended match here" line if we found something
802 // reasonable and not equal to what we showed in the "scanning from here"
803 // line.
804 if (Best && Best != StringRef::npos && BestQuality < 50) {
805 SMRange MatchRange =
806 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
807 getCheckTy(), Buffer, Best, 0, Diags);
808 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
809 "possible intended match here");
810
811 // FIXME: If we wanted to be really friendly we would show why the match
812 // failed, as it can be hard to spot simple one character differences.
813 }
814 }
815
816 Expected<StringRef>
getPatternVarValue(StringRef VarName)817 FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
818 auto VarIter = GlobalVariableTable.find(VarName);
819 if (VarIter == GlobalVariableTable.end())
820 return make_error<UndefVarError>(VarName);
821
822 return VarIter->second;
823 }
824
825 template <class... Types>
makeNumericVariable(Types...args)826 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) {
827 NumericVariables.push_back(std::make_unique<NumericVariable>(args...));
828 return NumericVariables.back().get();
829 }
830
831 Substitution *
makeStringSubstitution(StringRef VarName,size_t InsertIdx)832 FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
833 size_t InsertIdx) {
834 Substitutions.push_back(
835 std::make_unique<StringSubstitution>(this, VarName, InsertIdx));
836 return Substitutions.back().get();
837 }
838
makeNumericSubstitution(StringRef ExpressionStr,std::unique_ptr<ExpressionAST> ExpressionASTPointer,size_t InsertIdx)839 Substitution *FileCheckPatternContext::makeNumericSubstitution(
840 StringRef ExpressionStr,
841 std::unique_ptr<ExpressionAST> ExpressionASTPointer, size_t InsertIdx) {
842 Substitutions.push_back(std::make_unique<NumericSubstitution>(
843 this, ExpressionStr, std::move(ExpressionASTPointer), InsertIdx));
844 return Substitutions.back().get();
845 }
846
FindRegexVarEnd(StringRef Str,SourceMgr & SM)847 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
848 // Offset keeps track of the current offset within the input Str
849 size_t Offset = 0;
850 // [...] Nesting depth
851 size_t BracketDepth = 0;
852
853 while (!Str.empty()) {
854 if (Str.startswith("]]") && BracketDepth == 0)
855 return Offset;
856 if (Str[0] == '\\') {
857 // Backslash escapes the next char within regexes, so skip them both.
858 Str = Str.substr(2);
859 Offset += 2;
860 } else {
861 switch (Str[0]) {
862 default:
863 break;
864 case '[':
865 BracketDepth++;
866 break;
867 case ']':
868 if (BracketDepth == 0) {
869 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
870 SourceMgr::DK_Error,
871 "missing closing \"]\" for regex variable");
872 exit(1);
873 }
874 BracketDepth--;
875 break;
876 }
877 Str = Str.substr(1);
878 Offset++;
879 }
880 }
881
882 return StringRef::npos;
883 }
884
CanonicalizeFile(MemoryBuffer & MB,SmallVectorImpl<char> & OutputBuffer)885 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
886 SmallVectorImpl<char> &OutputBuffer) {
887 OutputBuffer.reserve(MB.getBufferSize());
888
889 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
890 Ptr != End; ++Ptr) {
891 // Eliminate trailing dosish \r.
892 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
893 continue;
894 }
895
896 // If current char is not a horizontal whitespace or if horizontal
897 // whitespace canonicalization is disabled, dump it to output as is.
898 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
899 OutputBuffer.push_back(*Ptr);
900 continue;
901 }
902
903 // Otherwise, add one space and advance over neighboring space.
904 OutputBuffer.push_back(' ');
905 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
906 ++Ptr;
907 }
908
909 // Add a null byte and then return all but that byte.
910 OutputBuffer.push_back('\0');
911 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
912 }
913
FileCheckDiag(const SourceMgr & SM,const Check::FileCheckType & CheckTy,SMLoc CheckLoc,MatchType MatchTy,SMRange InputRange)914 FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
915 const Check::FileCheckType &CheckTy,
916 SMLoc CheckLoc, MatchType MatchTy,
917 SMRange InputRange)
918 : CheckTy(CheckTy), MatchTy(MatchTy) {
919 auto Start = SM.getLineAndColumn(InputRange.Start);
920 auto End = SM.getLineAndColumn(InputRange.End);
921 InputStartLine = Start.first;
922 InputStartCol = Start.second;
923 InputEndLine = End.first;
924 InputEndCol = End.second;
925 Start = SM.getLineAndColumn(CheckLoc);
926 CheckLine = Start.first;
927 CheckCol = Start.second;
928 }
929
IsPartOfWord(char c)930 static bool IsPartOfWord(char c) {
931 return (isalnum(c) || c == '-' || c == '_');
932 }
933
setCount(int C)934 Check::FileCheckType &Check::FileCheckType::setCount(int C) {
935 assert(Count > 0 && "zero and negative counts are not supported");
936 assert((C == 1 || Kind == CheckPlain) &&
937 "count supported only for plain CHECK directives");
938 Count = C;
939 return *this;
940 }
941
getDescription(StringRef Prefix) const942 std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
943 switch (Kind) {
944 case Check::CheckNone:
945 return "invalid";
946 case Check::CheckPlain:
947 if (Count > 1)
948 return Prefix.str() + "-COUNT";
949 return Prefix;
950 case Check::CheckNext:
951 return Prefix.str() + "-NEXT";
952 case Check::CheckSame:
953 return Prefix.str() + "-SAME";
954 case Check::CheckNot:
955 return Prefix.str() + "-NOT";
956 case Check::CheckDAG:
957 return Prefix.str() + "-DAG";
958 case Check::CheckLabel:
959 return Prefix.str() + "-LABEL";
960 case Check::CheckEmpty:
961 return Prefix.str() + "-EMPTY";
962 case Check::CheckEOF:
963 return "implicit EOF";
964 case Check::CheckBadNot:
965 return "bad NOT";
966 case Check::CheckBadCount:
967 return "bad COUNT";
968 }
969 llvm_unreachable("unknown FileCheckType");
970 }
971
972 static std::pair<Check::FileCheckType, StringRef>
FindCheckType(StringRef Buffer,StringRef Prefix)973 FindCheckType(StringRef Buffer, StringRef Prefix) {
974 if (Buffer.size() <= Prefix.size())
975 return {Check::CheckNone, StringRef()};
976
977 char NextChar = Buffer[Prefix.size()];
978
979 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
980 // Verify that the : is present after the prefix.
981 if (NextChar == ':')
982 return {Check::CheckPlain, Rest};
983
984 if (NextChar != '-')
985 return {Check::CheckNone, StringRef()};
986
987 if (Rest.consume_front("COUNT-")) {
988 int64_t Count;
989 if (Rest.consumeInteger(10, Count))
990 // Error happened in parsing integer.
991 return {Check::CheckBadCount, Rest};
992 if (Count <= 0 || Count > INT32_MAX)
993 return {Check::CheckBadCount, Rest};
994 if (!Rest.consume_front(":"))
995 return {Check::CheckBadCount, Rest};
996 return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
997 }
998
999 if (Rest.consume_front("NEXT:"))
1000 return {Check::CheckNext, Rest};
1001
1002 if (Rest.consume_front("SAME:"))
1003 return {Check::CheckSame, Rest};
1004
1005 if (Rest.consume_front("NOT:"))
1006 return {Check::CheckNot, Rest};
1007
1008 if (Rest.consume_front("DAG:"))
1009 return {Check::CheckDAG, Rest};
1010
1011 if (Rest.consume_front("LABEL:"))
1012 return {Check::CheckLabel, Rest};
1013
1014 if (Rest.consume_front("EMPTY:"))
1015 return {Check::CheckEmpty, Rest};
1016
1017 // You can't combine -NOT with another suffix.
1018 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
1019 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
1020 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
1021 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
1022 return {Check::CheckBadNot, Rest};
1023
1024 return {Check::CheckNone, Rest};
1025 }
1026
1027 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)1028 static size_t SkipWord(StringRef Str, size_t Loc) {
1029 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
1030 ++Loc;
1031 return Loc;
1032 }
1033
1034 /// Searches the buffer for the first prefix in the prefix regular expression.
1035 ///
1036 /// This searches the buffer using the provided regular expression, however it
1037 /// enforces constraints beyond that:
1038 /// 1) The found prefix must not be a suffix of something that looks like
1039 /// a valid prefix.
1040 /// 2) The found prefix must be followed by a valid check type suffix using \c
1041 /// FindCheckType above.
1042 ///
1043 /// \returns a pair of StringRefs into the Buffer, which combines:
1044 /// - the first match of the regular expression to satisfy these two is
1045 /// returned,
1046 /// otherwise an empty StringRef is returned to indicate failure.
1047 /// - buffer rewound to the location right after parsed suffix, for parsing
1048 /// to continue from
1049 ///
1050 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
1051 /// start at the beginning of the returned prefix, increment \p LineNumber for
1052 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
1053 /// check found by examining the suffix.
1054 ///
1055 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
1056 /// is unspecified.
1057 static std::pair<StringRef, StringRef>
FindFirstMatchingPrefix(Regex & PrefixRE,StringRef & Buffer,unsigned & LineNumber,Check::FileCheckType & CheckTy)1058 FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
1059 unsigned &LineNumber, Check::FileCheckType &CheckTy) {
1060 SmallVector<StringRef, 2> Matches;
1061
1062 while (!Buffer.empty()) {
1063 // Find the first (longest) match using the RE.
1064 if (!PrefixRE.match(Buffer, &Matches))
1065 // No match at all, bail.
1066 return {StringRef(), StringRef()};
1067
1068 StringRef Prefix = Matches[0];
1069 Matches.clear();
1070
1071 assert(Prefix.data() >= Buffer.data() &&
1072 Prefix.data() < Buffer.data() + Buffer.size() &&
1073 "Prefix doesn't start inside of buffer!");
1074 size_t Loc = Prefix.data() - Buffer.data();
1075 StringRef Skipped = Buffer.substr(0, Loc);
1076 Buffer = Buffer.drop_front(Loc);
1077 LineNumber += Skipped.count('\n');
1078
1079 // Check that the matched prefix isn't a suffix of some other check-like
1080 // word.
1081 // FIXME: This is a very ad-hoc check. it would be better handled in some
1082 // other way. Among other things it seems hard to distinguish between
1083 // intentional and unintentional uses of this feature.
1084 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
1085 // Now extract the type.
1086 StringRef AfterSuffix;
1087 std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
1088
1089 // If we've found a valid check type for this prefix, we're done.
1090 if (CheckTy != Check::CheckNone)
1091 return {Prefix, AfterSuffix};
1092 }
1093
1094 // If we didn't successfully find a prefix, we need to skip this invalid
1095 // prefix and continue scanning. We directly skip the prefix that was
1096 // matched and any additional parts of that check-like word.
1097 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
1098 }
1099
1100 // We ran out of buffer while skipping partial matches so give up.
1101 return {StringRef(), StringRef()};
1102 }
1103
createLineVariable()1104 void FileCheckPatternContext::createLineVariable() {
1105 assert(!LineVariable && "@LINE pseudo numeric variable already created");
1106 StringRef LineName = "@LINE";
1107 LineVariable = makeNumericVariable(LineName);
1108 GlobalNumericVariableTable[LineName] = LineVariable;
1109 }
1110
FileCheck(FileCheckRequest Req)1111 FileCheck::FileCheck(FileCheckRequest Req)
1112 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()),
1113 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {}
1114
1115 FileCheck::~FileCheck() = default;
1116
readCheckFile(SourceMgr & SM,StringRef Buffer,Regex & PrefixRE)1117 bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
1118 Regex &PrefixRE) {
1119 Error DefineError =
1120 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
1121 if (DefineError) {
1122 logAllUnhandledErrors(std::move(DefineError), errs());
1123 return true;
1124 }
1125
1126 PatternContext->createLineVariable();
1127
1128 std::vector<Pattern> ImplicitNegativeChecks;
1129 for (const auto &PatternString : Req.ImplicitCheckNot) {
1130 // Create a buffer with fake command line content in order to display the
1131 // command line option responsible for the specific implicit CHECK-NOT.
1132 std::string Prefix = "-implicit-check-not='";
1133 std::string Suffix = "'";
1134 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
1135 Prefix + PatternString + Suffix, "command line");
1136
1137 StringRef PatternInBuffer =
1138 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
1139 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
1140
1141 ImplicitNegativeChecks.push_back(
1142 Pattern(Check::CheckNot, PatternContext.get()));
1143 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
1144 "IMPLICIT-CHECK", SM, Req);
1145 }
1146
1147 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
1148
1149 // LineNumber keeps track of the line on which CheckPrefix instances are
1150 // found.
1151 unsigned LineNumber = 1;
1152
1153 while (1) {
1154 Check::FileCheckType CheckTy;
1155
1156 // See if a prefix occurs in the memory buffer.
1157 StringRef UsedPrefix;
1158 StringRef AfterSuffix;
1159 std::tie(UsedPrefix, AfterSuffix) =
1160 FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
1161 if (UsedPrefix.empty())
1162 break;
1163 assert(UsedPrefix.data() == Buffer.data() &&
1164 "Failed to move Buffer's start forward, or pointed prefix outside "
1165 "of the buffer!");
1166 assert(AfterSuffix.data() >= Buffer.data() &&
1167 AfterSuffix.data() < Buffer.data() + Buffer.size() &&
1168 "Parsing after suffix doesn't start inside of buffer!");
1169
1170 // Location to use for error messages.
1171 const char *UsedPrefixStart = UsedPrefix.data();
1172
1173 // Skip the buffer to the end of parsed suffix (or just prefix, if no good
1174 // suffix was processed).
1175 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
1176 : AfterSuffix;
1177
1178 // Complain about useful-looking but unsupported suffixes.
1179 if (CheckTy == Check::CheckBadNot) {
1180 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1181 "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
1182 return true;
1183 }
1184
1185 // Complain about invalid count specification.
1186 if (CheckTy == Check::CheckBadCount) {
1187 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1188 "invalid count in -COUNT specification on prefix '" +
1189 UsedPrefix + "'");
1190 return true;
1191 }
1192
1193 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
1194 // leading whitespace.
1195 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
1196 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
1197
1198 // Scan ahead to the end of line.
1199 size_t EOL = Buffer.find_first_of("\n\r");
1200
1201 // Remember the location of the start of the pattern, for diagnostics.
1202 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
1203
1204 // Parse the pattern.
1205 Pattern P(CheckTy, PatternContext.get(), LineNumber);
1206 if (P.parsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, Req))
1207 return true;
1208
1209 // Verify that CHECK-LABEL lines do not define or use variables
1210 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
1211 SM.PrintMessage(
1212 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
1213 "found '" + UsedPrefix + "-LABEL:'"
1214 " with variable definition or use");
1215 return true;
1216 }
1217
1218 Buffer = Buffer.substr(EOL);
1219
1220 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
1221 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
1222 CheckTy == Check::CheckEmpty) &&
1223 CheckStrings->empty()) {
1224 StringRef Type = CheckTy == Check::CheckNext
1225 ? "NEXT"
1226 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
1227 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
1228 SourceMgr::DK_Error,
1229 "found '" + UsedPrefix + "-" + Type +
1230 "' without previous '" + UsedPrefix + ": line");
1231 return true;
1232 }
1233
1234 // Handle CHECK-DAG/-NOT.
1235 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
1236 DagNotMatches.push_back(P);
1237 continue;
1238 }
1239
1240 // Okay, add the string we captured to the output vector and move on.
1241 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc);
1242 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1243 DagNotMatches = ImplicitNegativeChecks;
1244 }
1245
1246 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
1247 // prefix as a filler for the error message.
1248 if (!DagNotMatches.empty()) {
1249 CheckStrings->emplace_back(
1250 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
1251 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
1252 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1253 }
1254
1255 if (CheckStrings->empty()) {
1256 errs() << "error: no check strings found with prefix"
1257 << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
1258 auto I = Req.CheckPrefixes.begin();
1259 auto E = Req.CheckPrefixes.end();
1260 if (I != E) {
1261 errs() << "\'" << *I << ":'";
1262 ++I;
1263 }
1264 for (; I != E; ++I)
1265 errs() << ", \'" << *I << ":'";
1266
1267 errs() << '\n';
1268 return true;
1269 }
1270
1271 return false;
1272 }
1273
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1274 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1275 StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1276 int MatchedCount, StringRef Buffer, size_t MatchPos,
1277 size_t MatchLen, const FileCheckRequest &Req,
1278 std::vector<FileCheckDiag> *Diags) {
1279 bool PrintDiag = true;
1280 if (ExpectedMatch) {
1281 if (!Req.Verbose)
1282 return;
1283 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1284 return;
1285 // Due to their verbosity, we don't print verbose diagnostics here if we're
1286 // gathering them for a different rendering, but we always print other
1287 // diagnostics.
1288 PrintDiag = !Diags;
1289 }
1290 SMRange MatchRange = ProcessMatchResult(
1291 ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
1292 : FileCheckDiag::MatchFoundButExcluded,
1293 SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
1294 if (!PrintDiag)
1295 return;
1296
1297 std::string Message = formatv("{0}: {1} string found in input",
1298 Pat.getCheckTy().getDescription(Prefix),
1299 (ExpectedMatch ? "expected" : "excluded"))
1300 .str();
1301 if (Pat.getCount() > 1)
1302 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1303
1304 SM.PrintMessage(
1305 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
1306 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
1307 {MatchRange});
1308 Pat.printSubstitutions(SM, Buffer, MatchRange);
1309 }
1310
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1311 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1312 const FileCheckString &CheckStr, int MatchedCount,
1313 StringRef Buffer, size_t MatchPos, size_t MatchLen,
1314 FileCheckRequest &Req,
1315 std::vector<FileCheckDiag> *Diags) {
1316 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1317 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
1318 }
1319
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1320 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1321 StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1322 int MatchedCount, StringRef Buffer,
1323 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags,
1324 Error MatchErrors) {
1325 assert(MatchErrors && "Called on successful match");
1326 bool PrintDiag = true;
1327 if (!ExpectedMatch) {
1328 if (!VerboseVerbose) {
1329 consumeError(std::move(MatchErrors));
1330 return;
1331 }
1332 // Due to their verbosity, we don't print verbose diagnostics here if we're
1333 // gathering them for a different rendering, but we always print other
1334 // diagnostics.
1335 PrintDiag = !Diags;
1336 }
1337
1338 // If the current position is at the end of a line, advance to the start of
1339 // the next line.
1340 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1341 SMRange SearchRange = ProcessMatchResult(
1342 ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
1343 : FileCheckDiag::MatchNoneAndExcluded,
1344 SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
1345 if (!PrintDiag) {
1346 consumeError(std::move(MatchErrors));
1347 return;
1348 }
1349
1350 MatchErrors = handleErrors(std::move(MatchErrors),
1351 [](const ErrorDiagnostic &E) { E.log(errs()); });
1352
1353 // No problem matching the string per se.
1354 if (!MatchErrors)
1355 return;
1356 consumeError(std::move(MatchErrors));
1357
1358 // Print "not found" diagnostic.
1359 std::string Message = formatv("{0}: {1} string not found in input",
1360 Pat.getCheckTy().getDescription(Prefix),
1361 (ExpectedMatch ? "expected" : "excluded"))
1362 .str();
1363 if (Pat.getCount() > 1)
1364 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1365 SM.PrintMessage(
1366 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
1367
1368 // Print the "scanning from here" line.
1369 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
1370
1371 // Allow the pattern to print additional information if desired.
1372 Pat.printSubstitutions(SM, Buffer);
1373
1374 if (ExpectedMatch)
1375 Pat.printFuzzyMatch(SM, Buffer, Diags);
1376 }
1377
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1378 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1379 const FileCheckString &CheckStr, int MatchedCount,
1380 StringRef Buffer, bool VerboseVerbose,
1381 std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
1382 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1383 MatchedCount, Buffer, VerboseVerbose, Diags,
1384 std::move(MatchErrors));
1385 }
1386
1387 /// Counts the number of newlines in the specified range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1388 static unsigned CountNumNewlinesBetween(StringRef Range,
1389 const char *&FirstNewLine) {
1390 unsigned NumNewLines = 0;
1391 while (1) {
1392 // Scan for newline.
1393 Range = Range.substr(Range.find_first_of("\n\r"));
1394 if (Range.empty())
1395 return NumNewLines;
1396
1397 ++NumNewLines;
1398
1399 // Handle \n\r and \r\n as a single newline.
1400 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1401 (Range[0] != Range[1]))
1402 Range = Range.substr(1);
1403 Range = Range.substr(1);
1404
1405 if (NumNewLines == 1)
1406 FirstNewLine = Range.begin();
1407 }
1408 }
1409
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1410 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
1411 bool IsLabelScanMode, size_t &MatchLen,
1412 FileCheckRequest &Req,
1413 std::vector<FileCheckDiag> *Diags) const {
1414 size_t LastPos = 0;
1415 std::vector<const Pattern *> NotStrings;
1416
1417 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1418 // bounds; we have not processed variable definitions within the bounded block
1419 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1420 // over the block again (including the last CHECK-LABEL) in normal mode.
1421 if (!IsLabelScanMode) {
1422 // Match "dag strings" (with mixed "not strings" if any).
1423 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
1424 if (LastPos == StringRef::npos)
1425 return StringRef::npos;
1426 }
1427
1428 // Match itself from the last position after matching CHECK-DAG.
1429 size_t LastMatchEnd = LastPos;
1430 size_t FirstMatchPos = 0;
1431 // Go match the pattern Count times. Majority of patterns only match with
1432 // count 1 though.
1433 assert(Pat.getCount() != 0 && "pattern count can not be zero");
1434 for (int i = 1; i <= Pat.getCount(); i++) {
1435 StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
1436 size_t CurrentMatchLen;
1437 // get a match at current start point
1438 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM);
1439
1440 // report
1441 if (!MatchResult) {
1442 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags,
1443 MatchResult.takeError());
1444 return StringRef::npos;
1445 }
1446 size_t MatchPos = *MatchResult;
1447 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
1448 Diags);
1449 if (i == 1)
1450 FirstMatchPos = LastPos + MatchPos;
1451
1452 // move start point after the match
1453 LastMatchEnd += MatchPos + CurrentMatchLen;
1454 }
1455 // Full match len counts from first match pos.
1456 MatchLen = LastMatchEnd - FirstMatchPos;
1457
1458 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1459 // or CHECK-NOT
1460 if (!IsLabelScanMode) {
1461 size_t MatchPos = FirstMatchPos - LastPos;
1462 StringRef MatchBuffer = Buffer.substr(LastPos);
1463 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1464
1465 // If this check is a "CHECK-NEXT", verify that the previous match was on
1466 // the previous line (i.e. that there is one newline between them).
1467 if (CheckNext(SM, SkippedRegion)) {
1468 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1469 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1470 Diags, Req.Verbose);
1471 return StringRef::npos;
1472 }
1473
1474 // If this check is a "CHECK-SAME", verify that the previous match was on
1475 // the same line (i.e. that there is no newline between them).
1476 if (CheckSame(SM, SkippedRegion)) {
1477 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1478 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1479 Diags, Req.Verbose);
1480 return StringRef::npos;
1481 }
1482
1483 // If this match had "not strings", verify that they don't exist in the
1484 // skipped region.
1485 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1486 return StringRef::npos;
1487 }
1488
1489 return FirstMatchPos;
1490 }
1491
CheckNext(const SourceMgr & SM,StringRef Buffer) const1492 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1493 if (Pat.getCheckTy() != Check::CheckNext &&
1494 Pat.getCheckTy() != Check::CheckEmpty)
1495 return false;
1496
1497 Twine CheckName =
1498 Prefix +
1499 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1500
1501 // Count the number of newlines between the previous match and this one.
1502 const char *FirstNewLine = nullptr;
1503 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1504
1505 if (NumNewLines == 0) {
1506 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1507 CheckName + ": is on the same line as previous match");
1508 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1509 "'next' match was here");
1510 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1511 "previous match ended here");
1512 return true;
1513 }
1514
1515 if (NumNewLines != 1) {
1516 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1517 CheckName +
1518 ": is not on the line after the previous match");
1519 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1520 "'next' match was here");
1521 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1522 "previous match ended here");
1523 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1524 "non-matching line after previous match is here");
1525 return true;
1526 }
1527
1528 return false;
1529 }
1530
CheckSame(const SourceMgr & SM,StringRef Buffer) const1531 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1532 if (Pat.getCheckTy() != Check::CheckSame)
1533 return false;
1534
1535 // Count the number of newlines between the previous match and this one.
1536 const char *FirstNewLine = nullptr;
1537 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1538
1539 if (NumNewLines != 0) {
1540 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1541 Prefix +
1542 "-SAME: is not on the same line as the previous match");
1543 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1544 "'next' match was here");
1545 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1546 "previous match ended here");
1547 return true;
1548 }
1549
1550 return false;
1551 }
1552
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1553 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1554 const std::vector<const Pattern *> &NotStrings,
1555 const FileCheckRequest &Req,
1556 std::vector<FileCheckDiag> *Diags) const {
1557 for (const Pattern *Pat : NotStrings) {
1558 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1559
1560 size_t MatchLen = 0;
1561 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM);
1562
1563 if (!MatchResult) {
1564 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
1565 Req.VerboseVerbose, Diags, MatchResult.takeError());
1566 continue;
1567 }
1568 size_t Pos = *MatchResult;
1569
1570 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
1571 Req, Diags);
1572
1573 return true;
1574 }
1575
1576 return false;
1577 }
1578
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1579 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1580 std::vector<const Pattern *> &NotStrings,
1581 const FileCheckRequest &Req,
1582 std::vector<FileCheckDiag> *Diags) const {
1583 if (DagNotStrings.empty())
1584 return 0;
1585
1586 // The start of the search range.
1587 size_t StartPos = 0;
1588
1589 struct MatchRange {
1590 size_t Pos;
1591 size_t End;
1592 };
1593 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
1594 // ranges are erased from this list once they are no longer in the search
1595 // range.
1596 std::list<MatchRange> MatchRanges;
1597
1598 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
1599 // group, so we don't use a range-based for loop here.
1600 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
1601 PatItr != PatEnd; ++PatItr) {
1602 const Pattern &Pat = *PatItr;
1603 assert((Pat.getCheckTy() == Check::CheckDAG ||
1604 Pat.getCheckTy() == Check::CheckNot) &&
1605 "Invalid CHECK-DAG or CHECK-NOT!");
1606
1607 if (Pat.getCheckTy() == Check::CheckNot) {
1608 NotStrings.push_back(&Pat);
1609 continue;
1610 }
1611
1612 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1613
1614 // CHECK-DAG always matches from the start.
1615 size_t MatchLen = 0, MatchPos = StartPos;
1616
1617 // Search for a match that doesn't overlap a previous match in this
1618 // CHECK-DAG group.
1619 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1620 StringRef MatchBuffer = Buffer.substr(MatchPos);
1621 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM);
1622 // With a group of CHECK-DAGs, a single mismatching means the match on
1623 // that group of CHECK-DAGs fails immediately.
1624 if (!MatchResult) {
1625 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
1626 Req.VerboseVerbose, Diags, MatchResult.takeError());
1627 return StringRef::npos;
1628 }
1629 size_t MatchPosBuf = *MatchResult;
1630 // Re-calc it as the offset relative to the start of the original string.
1631 MatchPos += MatchPosBuf;
1632 if (Req.VerboseVerbose)
1633 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1634 MatchLen, Req, Diags);
1635 MatchRange M{MatchPos, MatchPos + MatchLen};
1636 if (Req.AllowDeprecatedDagOverlap) {
1637 // We don't need to track all matches in this mode, so we just maintain
1638 // one match range that encompasses the current CHECK-DAG group's
1639 // matches.
1640 if (MatchRanges.empty())
1641 MatchRanges.insert(MatchRanges.end(), M);
1642 else {
1643 auto Block = MatchRanges.begin();
1644 Block->Pos = std::min(Block->Pos, M.Pos);
1645 Block->End = std::max(Block->End, M.End);
1646 }
1647 break;
1648 }
1649 // Iterate previous matches until overlapping match or insertion point.
1650 bool Overlap = false;
1651 for (; MI != ME; ++MI) {
1652 if (M.Pos < MI->End) {
1653 // !Overlap => New match has no overlap and is before this old match.
1654 // Overlap => New match overlaps this old match.
1655 Overlap = MI->Pos < M.End;
1656 break;
1657 }
1658 }
1659 if (!Overlap) {
1660 // Insert non-overlapping match into list.
1661 MatchRanges.insert(MI, M);
1662 break;
1663 }
1664 if (Req.VerboseVerbose) {
1665 // Due to their verbosity, we don't print verbose diagnostics here if
1666 // we're gathering them for a different rendering, but we always print
1667 // other diagnostics.
1668 if (!Diags) {
1669 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1670 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1671 SMRange OldRange(OldStart, OldEnd);
1672 SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1673 "match discarded, overlaps earlier DAG match here",
1674 {OldRange});
1675 } else
1676 Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
1677 }
1678 MatchPos = MI->End;
1679 }
1680 if (!Req.VerboseVerbose)
1681 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1682 MatchLen, Req, Diags);
1683
1684 // Handle the end of a CHECK-DAG group.
1685 if (std::next(PatItr) == PatEnd ||
1686 std::next(PatItr)->getCheckTy() == Check::CheckNot) {
1687 if (!NotStrings.empty()) {
1688 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
1689 // CHECK-DAG, verify that there are no 'not' strings occurred in that
1690 // region.
1691 StringRef SkippedRegion =
1692 Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1693 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1694 return StringRef::npos;
1695 // Clear "not strings".
1696 NotStrings.clear();
1697 }
1698 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
1699 // end of this CHECK-DAG group's match range.
1700 StartPos = MatchRanges.rbegin()->End;
1701 // Don't waste time checking for (impossible) overlaps before that.
1702 MatchRanges.clear();
1703 }
1704 }
1705
1706 return StartPos;
1707 }
1708
1709 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1710 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1711 static const Regex Validator("^[a-zA-Z0-9_-]*$");
1712 return Validator.match(CheckPrefix);
1713 }
1714
ValidateCheckPrefixes()1715 bool FileCheck::ValidateCheckPrefixes() {
1716 StringSet<> PrefixSet;
1717
1718 for (StringRef Prefix : Req.CheckPrefixes) {
1719 // Reject empty prefixes.
1720 if (Prefix == "")
1721 return false;
1722
1723 if (!PrefixSet.insert(Prefix).second)
1724 return false;
1725
1726 if (!ValidateCheckPrefix(Prefix))
1727 return false;
1728 }
1729
1730 return true;
1731 }
1732
buildCheckPrefixRegex()1733 Regex FileCheck::buildCheckPrefixRegex() {
1734 // I don't think there's a way to specify an initial value for cl::list,
1735 // so if nothing was specified, add the default
1736 if (Req.CheckPrefixes.empty())
1737 Req.CheckPrefixes.push_back("CHECK");
1738
1739 // We already validated the contents of CheckPrefixes so just concatenate
1740 // them as alternatives.
1741 SmallString<32> PrefixRegexStr;
1742 for (StringRef Prefix : Req.CheckPrefixes) {
1743 if (Prefix != Req.CheckPrefixes.front())
1744 PrefixRegexStr.push_back('|');
1745
1746 PrefixRegexStr.append(Prefix);
1747 }
1748
1749 return Regex(PrefixRegexStr);
1750 }
1751
defineCmdlineVariables(std::vector<std::string> & CmdlineDefines,SourceMgr & SM)1752 Error FileCheckPatternContext::defineCmdlineVariables(
1753 std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
1754 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
1755 "Overriding defined variable with command-line variable definitions");
1756
1757 if (CmdlineDefines.empty())
1758 return Error::success();
1759
1760 // Create a string representing the vector of command-line definitions. Each
1761 // definition is on its own line and prefixed with a definition number to
1762 // clarify which definition a given diagnostic corresponds to.
1763 unsigned I = 0;
1764 Error Errs = Error::success();
1765 std::string CmdlineDefsDiag;
1766 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices;
1767 for (StringRef CmdlineDef : CmdlineDefines) {
1768 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str();
1769 size_t EqIdx = CmdlineDef.find('=');
1770 if (EqIdx == StringRef::npos) {
1771 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0));
1772 continue;
1773 }
1774 // Numeric variable definition.
1775 if (CmdlineDef[0] == '#') {
1776 // Append a copy of the command-line definition adapted to use the same
1777 // format as in the input file to be able to reuse
1778 // parseNumericSubstitutionBlock.
1779 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str();
1780 std::string SubstitutionStr = CmdlineDef;
1781 SubstitutionStr[EqIdx] = ':';
1782 CmdlineDefsIndices.push_back(
1783 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size()));
1784 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str();
1785 } else {
1786 CmdlineDefsDiag += DefPrefix;
1787 CmdlineDefsIndices.push_back(
1788 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size()));
1789 CmdlineDefsDiag += (CmdlineDef + "\n").str();
1790 }
1791 }
1792
1793 // Create a buffer with fake command line content in order to display
1794 // parsing diagnostic with location information and point to the
1795 // global definition with invalid syntax.
1796 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
1797 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
1798 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
1799 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
1800
1801 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) {
1802 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first,
1803 CmdlineDefIndices.second);
1804 if (CmdlineDef.empty()) {
1805 Errs = joinErrors(
1806 std::move(Errs),
1807 ErrorDiagnostic::get(SM, CmdlineDef,
1808 "missing equal sign in global definition"));
1809 continue;
1810 }
1811
1812 // Numeric variable definition.
1813 if (CmdlineDef[0] == '#') {
1814 // Now parse the definition both to check that the syntax is correct and
1815 // to create the necessary class instance.
1816 StringRef CmdlineDefExpr = CmdlineDef.substr(1);
1817 Optional<NumericVariable *> DefinedNumericVariable;
1818 Expected<std::unique_ptr<ExpressionAST>> ExpressionASTResult =
1819 Pattern::parseNumericSubstitutionBlock(
1820 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM);
1821 if (!ExpressionASTResult) {
1822 Errs = joinErrors(std::move(Errs), ExpressionASTResult.takeError());
1823 continue;
1824 }
1825 std::unique_ptr<ExpressionAST> ExpressionASTPointer =
1826 std::move(*ExpressionASTResult);
1827 // Now evaluate the expression whose value this variable should be set
1828 // to, since the expression of a command-line variable definition should
1829 // only use variables defined earlier on the command-line. If not, this
1830 // is an error and we report it.
1831 Expected<uint64_t> Value = ExpressionASTPointer->eval();
1832 if (!Value) {
1833 Errs = joinErrors(std::move(Errs), Value.takeError());
1834 continue;
1835 }
1836
1837 assert(DefinedNumericVariable && "No variable defined");
1838 (*DefinedNumericVariable)->setValue(*Value);
1839
1840 // Record this variable definition.
1841 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] =
1842 *DefinedNumericVariable;
1843 } else {
1844 // String variable definition.
1845 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
1846 StringRef CmdlineName = CmdlineNameVal.first;
1847 StringRef OrigCmdlineName = CmdlineName;
1848 Expected<Pattern::VariableProperties> ParseVarResult =
1849 Pattern::parseVariable(CmdlineName, SM);
1850 if (!ParseVarResult) {
1851 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
1852 continue;
1853 }
1854 // Check that CmdlineName does not denote a pseudo variable is only
1855 // composed of the parsed numeric variable. This catches cases like
1856 // "FOO+2" in a "FOO+2=10" definition.
1857 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
1858 Errs = joinErrors(std::move(Errs),
1859 ErrorDiagnostic::get(
1860 SM, OrigCmdlineName,
1861 "invalid name in string variable definition '" +
1862 OrigCmdlineName + "'"));
1863 continue;
1864 }
1865 StringRef Name = ParseVarResult->Name;
1866
1867 // Detect collisions between string and numeric variables when the former
1868 // is created later than the latter.
1869 if (GlobalNumericVariableTable.find(Name) !=
1870 GlobalNumericVariableTable.end()) {
1871 Errs = joinErrors(std::move(Errs),
1872 ErrorDiagnostic::get(SM, Name,
1873 "numeric variable with name '" +
1874 Name + "' already exists"));
1875 continue;
1876 }
1877 GlobalVariableTable.insert(CmdlineNameVal);
1878 // Mark the string variable as defined to detect collisions between
1879 // string and numeric variables in defineCmdlineVariables when the latter
1880 // is created later than the former. We cannot reuse GlobalVariableTable
1881 // for this by populating it with an empty string since we would then
1882 // lose the ability to detect the use of an undefined variable in
1883 // match().
1884 DefinedVariableTable[Name] = true;
1885 }
1886 }
1887
1888 return Errs;
1889 }
1890
clearLocalVars()1891 void FileCheckPatternContext::clearLocalVars() {
1892 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
1893 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
1894 if (Var.first()[0] != '$')
1895 LocalPatternVars.push_back(Var.first());
1896
1897 // Numeric substitution reads the value of a variable directly, not via
1898 // GlobalNumericVariableTable. Therefore, we clear local variables by
1899 // clearing their value which will lead to a numeric substitution failure. We
1900 // also mark the variable for removal from GlobalNumericVariableTable since
1901 // this is what defineCmdlineVariables checks to decide that no global
1902 // variable has been defined.
1903 for (const auto &Var : GlobalNumericVariableTable)
1904 if (Var.first()[0] != '$') {
1905 Var.getValue()->clearValue();
1906 LocalNumericVars.push_back(Var.first());
1907 }
1908
1909 for (const auto &Var : LocalPatternVars)
1910 GlobalVariableTable.erase(Var);
1911 for (const auto &Var : LocalNumericVars)
1912 GlobalNumericVariableTable.erase(Var);
1913 }
1914
checkInput(SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags)1915 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer,
1916 std::vector<FileCheckDiag> *Diags) {
1917 bool ChecksFailed = false;
1918
1919 unsigned i = 0, j = 0, e = CheckStrings->size();
1920 while (true) {
1921 StringRef CheckRegion;
1922 if (j == e) {
1923 CheckRegion = Buffer;
1924 } else {
1925 const FileCheckString &CheckLabelStr = (*CheckStrings)[j];
1926 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1927 ++j;
1928 continue;
1929 }
1930
1931 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1932 size_t MatchLabelLen = 0;
1933 size_t MatchLabelPos =
1934 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
1935 if (MatchLabelPos == StringRef::npos)
1936 // Immediately bail if CHECK-LABEL fails, nothing else we can do.
1937 return false;
1938
1939 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1940 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1941 ++j;
1942 }
1943
1944 // Do not clear the first region as it's the one before the first
1945 // CHECK-LABEL and it would clear variables defined on the command-line
1946 // before they get used.
1947 if (i != 0 && Req.EnableVarScope)
1948 PatternContext->clearLocalVars();
1949
1950 for (; i != j; ++i) {
1951 const FileCheckString &CheckStr = (*CheckStrings)[i];
1952
1953 // Check each string within the scanned region, including a second check
1954 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1955 size_t MatchLen = 0;
1956 size_t MatchPos =
1957 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
1958
1959 if (MatchPos == StringRef::npos) {
1960 ChecksFailed = true;
1961 i = j;
1962 break;
1963 }
1964
1965 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1966 }
1967
1968 if (j == e)
1969 break;
1970 }
1971
1972 // Success if no checks failed.
1973 return !ChecksFailed;
1974 }
1975