1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileCheck does a line-by line check of a file that validates whether it
10 // contains the expected content. This is useful for regression tests etc.
11 //
12 // This file implements most of the API that will be used by the FileCheck utility
13 // as well as various unittests.
14 //===----------------------------------------------------------------------===//
15
16 #include "llvm/Support/FileCheck.h"
17 #include "FileCheckImpl.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/Support/CheckedArithmetic.h"
21 #include "llvm/Support/FormatVariadic.h"
22 #include <cstdint>
23 #include <list>
24 #include <tuple>
25 #include <utility>
26
27 using namespace llvm;
28
toString() const29 StringRef ExpressionFormat::toString() const {
30 switch (Value) {
31 case Kind::NoFormat:
32 return StringRef("<none>");
33 case Kind::Unsigned:
34 return StringRef("%u");
35 case Kind::Signed:
36 return StringRef("%d");
37 case Kind::HexUpper:
38 return StringRef("%X");
39 case Kind::HexLower:
40 return StringRef("%x");
41 }
42 llvm_unreachable("unknown expression format");
43 }
44
getWildcardRegex() const45 Expected<StringRef> ExpressionFormat::getWildcardRegex() const {
46 switch (Value) {
47 case Kind::Unsigned:
48 return StringRef("[0-9]+");
49 case Kind::Signed:
50 return StringRef("-?[0-9]+");
51 case Kind::HexUpper:
52 return StringRef("[0-9A-F]+");
53 case Kind::HexLower:
54 return StringRef("[0-9a-f]+");
55 default:
56 return createStringError(std::errc::invalid_argument,
57 "trying to match value with invalid format");
58 }
59 }
60
61 Expected<std::string>
getMatchingString(ExpressionValue IntegerValue) const62 ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
63 if (Value == Kind::Signed) {
64 Expected<int64_t> SignedValue = IntegerValue.getSignedValue();
65 if (!SignedValue)
66 return SignedValue.takeError();
67 return itostr(*SignedValue);
68 }
69
70 Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue();
71 if (!UnsignedValue)
72 return UnsignedValue.takeError();
73 switch (Value) {
74 case Kind::Unsigned:
75 return utostr(*UnsignedValue);
76 case Kind::HexUpper:
77 return utohexstr(*UnsignedValue, /*LowerCase=*/false);
78 case Kind::HexLower:
79 return utohexstr(*UnsignedValue, /*LowerCase=*/true);
80 default:
81 return createStringError(std::errc::invalid_argument,
82 "trying to match value with invalid format");
83 }
84 }
85
86 Expected<ExpressionValue>
valueFromStringRepr(StringRef StrVal,const SourceMgr & SM) const87 ExpressionFormat::valueFromStringRepr(StringRef StrVal,
88 const SourceMgr &SM) const {
89 bool ValueIsSigned = Value == Kind::Signed;
90 StringRef OverflowErrorStr = "unable to represent numeric value";
91 if (ValueIsSigned) {
92 int64_t SignedValue;
93
94 if (StrVal.getAsInteger(10, SignedValue))
95 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr);
96
97 return ExpressionValue(SignedValue);
98 }
99
100 bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower;
101 uint64_t UnsignedValue;
102 if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue))
103 return ErrorDiagnostic::get(SM, StrVal, OverflowErrorStr);
104
105 return ExpressionValue(UnsignedValue);
106 }
107
getAsSigned(uint64_t UnsignedValue)108 static int64_t getAsSigned(uint64_t UnsignedValue) {
109 // Use memcpy to reinterpret the bitpattern in Value since casting to
110 // signed is implementation-defined if the unsigned value is too big to be
111 // represented in the signed type and using an union violates type aliasing
112 // rules.
113 int64_t SignedValue;
114 memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue));
115 return SignedValue;
116 }
117
getSignedValue() const118 Expected<int64_t> ExpressionValue::getSignedValue() const {
119 if (Negative)
120 return getAsSigned(Value);
121
122 if (Value > (uint64_t)std::numeric_limits<int64_t>::max())
123 return make_error<OverflowError>();
124
125 // Value is in the representable range of int64_t so we can use cast.
126 return static_cast<int64_t>(Value);
127 }
128
getUnsignedValue() const129 Expected<uint64_t> ExpressionValue::getUnsignedValue() const {
130 if (Negative)
131 return make_error<OverflowError>();
132
133 return Value;
134 }
135
getAbsolute() const136 ExpressionValue ExpressionValue::getAbsolute() const {
137 if (!Negative)
138 return *this;
139
140 int64_t SignedValue = getAsSigned(Value);
141 int64_t MaxInt64 = std::numeric_limits<int64_t>::max();
142 // Absolute value can be represented as int64_t.
143 if (SignedValue >= -MaxInt64)
144 return ExpressionValue(-getAsSigned(Value));
145
146 // -X == -(max int64_t + Rem), negate each component independently.
147 SignedValue += MaxInt64;
148 uint64_t RemainingValueAbsolute = -SignedValue;
149 return ExpressionValue(MaxInt64 + RemainingValueAbsolute);
150 }
151
operator +(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)152 Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand,
153 const ExpressionValue &RightOperand) {
154 if (LeftOperand.isNegative() && RightOperand.isNegative()) {
155 int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
156 int64_t RightValue = cantFail(RightOperand.getSignedValue());
157 Optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue);
158 if (!Result)
159 return make_error<OverflowError>();
160
161 return ExpressionValue(*Result);
162 }
163
164 // (-A) + B == B - A.
165 if (LeftOperand.isNegative())
166 return RightOperand - LeftOperand.getAbsolute();
167
168 // A + (-B) == A - B.
169 if (RightOperand.isNegative())
170 return LeftOperand - RightOperand.getAbsolute();
171
172 // Both values are positive at this point.
173 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
174 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
175 Optional<uint64_t> Result =
176 checkedAddUnsigned<uint64_t>(LeftValue, RightValue);
177 if (!Result)
178 return make_error<OverflowError>();
179
180 return ExpressionValue(*Result);
181 }
182
operator -(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)183 Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand,
184 const ExpressionValue &RightOperand) {
185 // Result will be negative and thus might underflow.
186 if (LeftOperand.isNegative() && !RightOperand.isNegative()) {
187 int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
188 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
189 // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement.
190 if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max())
191 return make_error<OverflowError>();
192 Optional<int64_t> Result =
193 checkedSub(LeftValue, static_cast<int64_t>(RightValue));
194 if (!Result)
195 return make_error<OverflowError>();
196
197 return ExpressionValue(*Result);
198 }
199
200 // (-A) - (-B) == B - A.
201 if (LeftOperand.isNegative())
202 return RightOperand.getAbsolute() - LeftOperand.getAbsolute();
203
204 // A - (-B) == A + B.
205 if (RightOperand.isNegative())
206 return LeftOperand + RightOperand.getAbsolute();
207
208 // Both values are positive at this point.
209 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
210 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
211 if (LeftValue >= RightValue)
212 return ExpressionValue(LeftValue - RightValue);
213 else {
214 uint64_t AbsoluteDifference = RightValue - LeftValue;
215 uint64_t MaxInt64 = std::numeric_limits<int64_t>::max();
216 // Value might underflow.
217 if (AbsoluteDifference > MaxInt64) {
218 AbsoluteDifference -= MaxInt64;
219 int64_t Result = -MaxInt64;
220 int64_t MinInt64 = std::numeric_limits<int64_t>::min();
221 // Underflow, tested by:
222 // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t))
223 if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result)))
224 return make_error<OverflowError>();
225 Result -= static_cast<int64_t>(AbsoluteDifference);
226 return ExpressionValue(Result);
227 }
228
229 return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference));
230 }
231 }
232
operator *(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)233 Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand,
234 const ExpressionValue &RightOperand) {
235 // -A * -B == A * B
236 if (LeftOperand.isNegative() && RightOperand.isNegative())
237 return LeftOperand.getAbsolute() * RightOperand.getAbsolute();
238
239 // A * -B == -B * A
240 if (RightOperand.isNegative())
241 return RightOperand * LeftOperand;
242
243 assert(!RightOperand.isNegative() && "Unexpected negative operand!");
244
245 // Result will be negative and can underflow.
246 if (LeftOperand.isNegative()) {
247 auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute();
248 if (!Result)
249 return Result;
250
251 return ExpressionValue(0) - *Result;
252 }
253
254 // Result will be positive and can overflow.
255 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
256 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
257 Optional<uint64_t> Result =
258 checkedMulUnsigned<uint64_t>(LeftValue, RightValue);
259 if (!Result)
260 return make_error<OverflowError>();
261
262 return ExpressionValue(*Result);
263 }
264
operator /(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)265 Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand,
266 const ExpressionValue &RightOperand) {
267 // -A / -B == A / B
268 if (LeftOperand.isNegative() && RightOperand.isNegative())
269 return LeftOperand.getAbsolute() / RightOperand.getAbsolute();
270
271 // Check for divide by zero.
272 if (RightOperand == ExpressionValue(0))
273 return make_error<OverflowError>();
274
275 // Result will be negative and can underflow.
276 if (LeftOperand.isNegative() || RightOperand.isNegative())
277 return ExpressionValue(0) -
278 cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute());
279
280 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
281 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
282 return ExpressionValue(LeftValue / RightValue);
283 }
284
max(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)285 Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand,
286 const ExpressionValue &RightOperand) {
287 if (LeftOperand.isNegative() && RightOperand.isNegative()) {
288 int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
289 int64_t RightValue = cantFail(RightOperand.getSignedValue());
290 return ExpressionValue(std::max(LeftValue, RightValue));
291 }
292
293 if (!LeftOperand.isNegative() && !RightOperand.isNegative()) {
294 uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
295 uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
296 return ExpressionValue(std::max(LeftValue, RightValue));
297 }
298
299 if (LeftOperand.isNegative())
300 return RightOperand;
301
302 return LeftOperand;
303 }
304
min(const ExpressionValue & LeftOperand,const ExpressionValue & RightOperand)305 Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand,
306 const ExpressionValue &RightOperand) {
307 if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand)
308 return RightOperand;
309
310 return LeftOperand;
311 }
312
eval() const313 Expected<ExpressionValue> NumericVariableUse::eval() const {
314 Optional<ExpressionValue> Value = Variable->getValue();
315 if (Value)
316 return *Value;
317
318 return make_error<UndefVarError>(getExpressionStr());
319 }
320
eval() const321 Expected<ExpressionValue> BinaryOperation::eval() const {
322 Expected<ExpressionValue> LeftOp = LeftOperand->eval();
323 Expected<ExpressionValue> RightOp = RightOperand->eval();
324
325 // Bubble up any error (e.g. undefined variables) in the recursive
326 // evaluation.
327 if (!LeftOp || !RightOp) {
328 Error Err = Error::success();
329 if (!LeftOp)
330 Err = joinErrors(std::move(Err), LeftOp.takeError());
331 if (!RightOp)
332 Err = joinErrors(std::move(Err), RightOp.takeError());
333 return std::move(Err);
334 }
335
336 return EvalBinop(*LeftOp, *RightOp);
337 }
338
339 Expected<ExpressionFormat>
getImplicitFormat(const SourceMgr & SM) const340 BinaryOperation::getImplicitFormat(const SourceMgr &SM) const {
341 Expected<ExpressionFormat> LeftFormat = LeftOperand->getImplicitFormat(SM);
342 Expected<ExpressionFormat> RightFormat = RightOperand->getImplicitFormat(SM);
343 if (!LeftFormat || !RightFormat) {
344 Error Err = Error::success();
345 if (!LeftFormat)
346 Err = joinErrors(std::move(Err), LeftFormat.takeError());
347 if (!RightFormat)
348 Err = joinErrors(std::move(Err), RightFormat.takeError());
349 return std::move(Err);
350 }
351
352 if (*LeftFormat != ExpressionFormat::Kind::NoFormat &&
353 *RightFormat != ExpressionFormat::Kind::NoFormat &&
354 *LeftFormat != *RightFormat)
355 return ErrorDiagnostic::get(
356 SM, getExpressionStr(),
357 "implicit format conflict between '" + LeftOperand->getExpressionStr() +
358 "' (" + LeftFormat->toString() + ") and '" +
359 RightOperand->getExpressionStr() + "' (" + RightFormat->toString() +
360 "), need an explicit format specifier");
361
362 return *LeftFormat != ExpressionFormat::Kind::NoFormat ? *LeftFormat
363 : *RightFormat;
364 }
365
getResult() const366 Expected<std::string> NumericSubstitution::getResult() const {
367 assert(ExpressionPointer->getAST() != nullptr &&
368 "Substituting empty expression");
369 Expected<ExpressionValue> EvaluatedValue =
370 ExpressionPointer->getAST()->eval();
371 if (!EvaluatedValue)
372 return EvaluatedValue.takeError();
373 ExpressionFormat Format = ExpressionPointer->getFormat();
374 return Format.getMatchingString(*EvaluatedValue);
375 }
376
getResult() const377 Expected<std::string> StringSubstitution::getResult() const {
378 // Look up the value and escape it so that we can put it into the regex.
379 Expected<StringRef> VarVal = Context->getPatternVarValue(FromStr);
380 if (!VarVal)
381 return VarVal.takeError();
382 return Regex::escape(*VarVal);
383 }
384
isValidVarNameStart(char C)385 bool Pattern::isValidVarNameStart(char C) { return C == '_' || isAlpha(C); }
386
387 Expected<Pattern::VariableProperties>
parseVariable(StringRef & Str,const SourceMgr & SM)388 Pattern::parseVariable(StringRef &Str, const SourceMgr &SM) {
389 if (Str.empty())
390 return ErrorDiagnostic::get(SM, Str, "empty variable name");
391
392 size_t I = 0;
393 bool IsPseudo = Str[0] == '@';
394
395 // Global vars start with '$'.
396 if (Str[0] == '$' || IsPseudo)
397 ++I;
398
399 if (!isValidVarNameStart(Str[I++]))
400 return ErrorDiagnostic::get(SM, Str, "invalid variable name");
401
402 for (size_t E = Str.size(); I != E; ++I)
403 // Variable names are composed of alphanumeric characters and underscores.
404 if (Str[I] != '_' && !isAlnum(Str[I]))
405 break;
406
407 StringRef Name = Str.take_front(I);
408 Str = Str.substr(I);
409 return VariableProperties {Name, IsPseudo};
410 }
411
412 // StringRef holding all characters considered as horizontal whitespaces by
413 // FileCheck input canonicalization.
414 constexpr StringLiteral SpaceChars = " \t";
415
416 // Parsing helper function that strips the first character in S and returns it.
popFront(StringRef & S)417 static char popFront(StringRef &S) {
418 char C = S.front();
419 S = S.drop_front();
420 return C;
421 }
422
423 char OverflowError::ID = 0;
424 char UndefVarError::ID = 0;
425 char ErrorDiagnostic::ID = 0;
426 char NotFoundError::ID = 0;
427
parseNumericVariableDefinition(StringRef & Expr,FileCheckPatternContext * Context,Optional<size_t> LineNumber,ExpressionFormat ImplicitFormat,const SourceMgr & SM)428 Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
429 StringRef &Expr, FileCheckPatternContext *Context,
430 Optional<size_t> LineNumber, ExpressionFormat ImplicitFormat,
431 const SourceMgr &SM) {
432 Expected<VariableProperties> ParseVarResult = parseVariable(Expr, SM);
433 if (!ParseVarResult)
434 return ParseVarResult.takeError();
435 StringRef Name = ParseVarResult->Name;
436
437 if (ParseVarResult->IsPseudo)
438 return ErrorDiagnostic::get(
439 SM, Name, "definition of pseudo numeric variable unsupported");
440
441 // Detect collisions between string and numeric variables when the latter
442 // is created later than the former.
443 if (Context->DefinedVariableTable.find(Name) !=
444 Context->DefinedVariableTable.end())
445 return ErrorDiagnostic::get(
446 SM, Name, "string variable with name '" + Name + "' already exists");
447
448 Expr = Expr.ltrim(SpaceChars);
449 if (!Expr.empty())
450 return ErrorDiagnostic::get(
451 SM, Expr, "unexpected characters after numeric variable name");
452
453 NumericVariable *DefinedNumericVariable;
454 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
455 if (VarTableIter != Context->GlobalNumericVariableTable.end()) {
456 DefinedNumericVariable = VarTableIter->second;
457 if (DefinedNumericVariable->getImplicitFormat() != ImplicitFormat)
458 return ErrorDiagnostic::get(
459 SM, Expr, "format different from previous variable definition");
460 } else
461 DefinedNumericVariable =
462 Context->makeNumericVariable(Name, ImplicitFormat, LineNumber);
463
464 return DefinedNumericVariable;
465 }
466
parseNumericVariableUse(StringRef Name,bool IsPseudo,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)467 Expected<std::unique_ptr<NumericVariableUse>> Pattern::parseNumericVariableUse(
468 StringRef Name, bool IsPseudo, Optional<size_t> LineNumber,
469 FileCheckPatternContext *Context, const SourceMgr &SM) {
470 if (IsPseudo && !Name.equals("@LINE"))
471 return ErrorDiagnostic::get(
472 SM, Name, "invalid pseudo numeric variable '" + Name + "'");
473
474 // Numeric variable definitions and uses are parsed in the order in which
475 // they appear in the CHECK patterns. For each definition, the pointer to the
476 // class instance of the corresponding numeric variable definition is stored
477 // in GlobalNumericVariableTable in parsePattern. Therefore, if the pointer
478 // we get below is null, it means no such variable was defined before. When
479 // that happens, we create a dummy variable so that parsing can continue. All
480 // uses of undefined variables, whether string or numeric, are then diagnosed
481 // in printSubstitutions() after failing to match.
482 auto VarTableIter = Context->GlobalNumericVariableTable.find(Name);
483 NumericVariable *NumericVariable;
484 if (VarTableIter != Context->GlobalNumericVariableTable.end())
485 NumericVariable = VarTableIter->second;
486 else {
487 NumericVariable = Context->makeNumericVariable(
488 Name, ExpressionFormat(ExpressionFormat::Kind::Unsigned));
489 Context->GlobalNumericVariableTable[Name] = NumericVariable;
490 }
491
492 Optional<size_t> DefLineNumber = NumericVariable->getDefLineNumber();
493 if (DefLineNumber && LineNumber && *DefLineNumber == *LineNumber)
494 return ErrorDiagnostic::get(
495 SM, Name,
496 "numeric variable '" + Name +
497 "' defined earlier in the same CHECK directive");
498
499 return std::make_unique<NumericVariableUse>(Name, NumericVariable);
500 }
501
parseNumericOperand(StringRef & Expr,AllowedOperand AO,bool MaybeInvalidConstraint,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)502 Expected<std::unique_ptr<ExpressionAST>> Pattern::parseNumericOperand(
503 StringRef &Expr, AllowedOperand AO, bool MaybeInvalidConstraint,
504 Optional<size_t> LineNumber, FileCheckPatternContext *Context,
505 const SourceMgr &SM) {
506 if (Expr.startswith("(")) {
507 if (AO != AllowedOperand::Any)
508 return ErrorDiagnostic::get(
509 SM, Expr, "parenthesized expression not permitted here");
510 return parseParenExpr(Expr, LineNumber, Context, SM);
511 }
512
513 if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) {
514 // Try to parse as a numeric variable use.
515 Expected<Pattern::VariableProperties> ParseVarResult =
516 parseVariable(Expr, SM);
517 if (ParseVarResult) {
518 // Try to parse a function call.
519 if (Expr.ltrim(SpaceChars).startswith("(")) {
520 if (AO != AllowedOperand::Any)
521 return ErrorDiagnostic::get(SM, ParseVarResult->Name,
522 "unexpected function call");
523
524 return parseCallExpr(Expr, ParseVarResult->Name, LineNumber, Context,
525 SM);
526 }
527
528 return parseNumericVariableUse(ParseVarResult->Name,
529 ParseVarResult->IsPseudo, LineNumber,
530 Context, SM);
531 }
532
533 if (AO == AllowedOperand::LineVar)
534 return ParseVarResult.takeError();
535 // Ignore the error and retry parsing as a literal.
536 consumeError(ParseVarResult.takeError());
537 }
538
539 // Otherwise, parse it as a literal.
540 int64_t SignedLiteralValue;
541 uint64_t UnsignedLiteralValue;
542 StringRef SaveExpr = Expr;
543 // Accept both signed and unsigned literal, default to signed literal.
544 if (!Expr.consumeInteger((AO == AllowedOperand::LegacyLiteral) ? 10 : 0,
545 UnsignedLiteralValue))
546 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
547 UnsignedLiteralValue);
548 Expr = SaveExpr;
549 if (AO == AllowedOperand::Any && !Expr.consumeInteger(0, SignedLiteralValue))
550 return std::make_unique<ExpressionLiteral>(SaveExpr.drop_back(Expr.size()),
551 SignedLiteralValue);
552
553 return ErrorDiagnostic::get(
554 SM, Expr,
555 Twine("invalid ") +
556 (MaybeInvalidConstraint ? "matching constraint or " : "") +
557 "operand format");
558 }
559
560 Expected<std::unique_ptr<ExpressionAST>>
parseParenExpr(StringRef & Expr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)561 Pattern::parseParenExpr(StringRef &Expr, Optional<size_t> LineNumber,
562 FileCheckPatternContext *Context, const SourceMgr &SM) {
563 Expr = Expr.ltrim(SpaceChars);
564 assert(Expr.startswith("("));
565
566 // Parse right operand.
567 Expr.consume_front("(");
568 Expr = Expr.ltrim(SpaceChars);
569 if (Expr.empty())
570 return ErrorDiagnostic::get(SM, Expr, "missing operand in expression");
571
572 // Note: parseNumericOperand handles nested opening parentheses.
573 Expected<std::unique_ptr<ExpressionAST>> SubExprResult = parseNumericOperand(
574 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber,
575 Context, SM);
576 Expr = Expr.ltrim(SpaceChars);
577 while (SubExprResult && !Expr.empty() && !Expr.startswith(")")) {
578 StringRef OrigExpr = Expr;
579 SubExprResult = parseBinop(OrigExpr, Expr, std::move(*SubExprResult), false,
580 LineNumber, Context, SM);
581 Expr = Expr.ltrim(SpaceChars);
582 }
583 if (!SubExprResult)
584 return SubExprResult;
585
586 if (!Expr.consume_front(")")) {
587 return ErrorDiagnostic::get(SM, Expr,
588 "missing ')' at end of nested expression");
589 }
590 return SubExprResult;
591 }
592
593 Expected<std::unique_ptr<ExpressionAST>>
parseBinop(StringRef Expr,StringRef & RemainingExpr,std::unique_ptr<ExpressionAST> LeftOp,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)594 Pattern::parseBinop(StringRef Expr, StringRef &RemainingExpr,
595 std::unique_ptr<ExpressionAST> LeftOp,
596 bool IsLegacyLineExpr, Optional<size_t> LineNumber,
597 FileCheckPatternContext *Context, const SourceMgr &SM) {
598 RemainingExpr = RemainingExpr.ltrim(SpaceChars);
599 if (RemainingExpr.empty())
600 return std::move(LeftOp);
601
602 // Check if this is a supported operation and select a function to perform
603 // it.
604 SMLoc OpLoc = SMLoc::getFromPointer(RemainingExpr.data());
605 char Operator = popFront(RemainingExpr);
606 binop_eval_t EvalBinop;
607 switch (Operator) {
608 case '+':
609 EvalBinop = operator+;
610 break;
611 case '-':
612 EvalBinop = operator-;
613 break;
614 default:
615 return ErrorDiagnostic::get(
616 SM, OpLoc, Twine("unsupported operation '") + Twine(Operator) + "'");
617 }
618
619 // Parse right operand.
620 RemainingExpr = RemainingExpr.ltrim(SpaceChars);
621 if (RemainingExpr.empty())
622 return ErrorDiagnostic::get(SM, RemainingExpr,
623 "missing operand in expression");
624 // The second operand in a legacy @LINE expression is always a literal.
625 AllowedOperand AO =
626 IsLegacyLineExpr ? AllowedOperand::LegacyLiteral : AllowedOperand::Any;
627 Expected<std::unique_ptr<ExpressionAST>> RightOpResult =
628 parseNumericOperand(RemainingExpr, AO, /*MaybeInvalidConstraint=*/false,
629 LineNumber, Context, SM);
630 if (!RightOpResult)
631 return RightOpResult;
632
633 Expr = Expr.drop_back(RemainingExpr.size());
634 return std::make_unique<BinaryOperation>(Expr, EvalBinop, std::move(LeftOp),
635 std::move(*RightOpResult));
636 }
637
638 Expected<std::unique_ptr<ExpressionAST>>
parseCallExpr(StringRef & Expr,StringRef FuncName,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)639 Pattern::parseCallExpr(StringRef &Expr, StringRef FuncName,
640 Optional<size_t> LineNumber,
641 FileCheckPatternContext *Context, const SourceMgr &SM) {
642 Expr = Expr.ltrim(SpaceChars);
643 assert(Expr.startswith("("));
644
645 auto OptFunc = StringSwitch<Optional<binop_eval_t>>(FuncName)
646 .Case("add", operator+)
647 .Case("div", operator/)
648 .Case("max", max)
649 .Case("min", min)
650 .Case("mul", operator*)
651 .Case("sub", operator-)
652 .Default(None);
653
654 if (!OptFunc)
655 return ErrorDiagnostic::get(
656 SM, FuncName, Twine("call to undefined function '") + FuncName + "'");
657
658 Expr.consume_front("(");
659 Expr = Expr.ltrim(SpaceChars);
660
661 // Parse call arguments, which are comma separated.
662 SmallVector<std::unique_ptr<ExpressionAST>, 4> Args;
663 while (!Expr.empty() && !Expr.startswith(")")) {
664 if (Expr.startswith(","))
665 return ErrorDiagnostic::get(SM, Expr, "missing argument");
666
667 // Parse the argument, which is an arbitary expression.
668 StringRef OuterBinOpExpr = Expr;
669 Expected<std::unique_ptr<ExpressionAST>> Arg = parseNumericOperand(
670 Expr, AllowedOperand::Any, /*MaybeInvalidConstraint=*/false, LineNumber,
671 Context, SM);
672 while (Arg && !Expr.empty()) {
673 Expr = Expr.ltrim(SpaceChars);
674 // Have we reached an argument terminator?
675 if (Expr.startswith(",") || Expr.startswith(")"))
676 break;
677
678 // Arg = Arg <op> <expr>
679 Arg = parseBinop(OuterBinOpExpr, Expr, std::move(*Arg), false, LineNumber,
680 Context, SM);
681 }
682
683 // Prefer an expression error over a generic invalid argument message.
684 if (!Arg)
685 return Arg.takeError();
686 Args.push_back(std::move(*Arg));
687
688 // Have we parsed all available arguments?
689 Expr = Expr.ltrim(SpaceChars);
690 if (!Expr.consume_front(","))
691 break;
692
693 Expr = Expr.ltrim(SpaceChars);
694 if (Expr.startswith(")"))
695 return ErrorDiagnostic::get(SM, Expr, "missing argument");
696 }
697
698 if (!Expr.consume_front(")"))
699 return ErrorDiagnostic::get(SM, Expr,
700 "missing ')' at end of call expression");
701
702 const unsigned NumArgs = Args.size();
703 if (NumArgs == 2)
704 return std::make_unique<BinaryOperation>(Expr, *OptFunc, std::move(Args[0]),
705 std::move(Args[1]));
706
707 // TODO: Support more than binop_eval_t.
708 return ErrorDiagnostic::get(SM, FuncName,
709 Twine("function '") + FuncName +
710 Twine("' takes 2 arguments but ") +
711 Twine(NumArgs) + " given");
712 }
713
parseNumericSubstitutionBlock(StringRef Expr,Optional<NumericVariable * > & DefinedNumericVariable,bool IsLegacyLineExpr,Optional<size_t> LineNumber,FileCheckPatternContext * Context,const SourceMgr & SM)714 Expected<std::unique_ptr<Expression>> Pattern::parseNumericSubstitutionBlock(
715 StringRef Expr, Optional<NumericVariable *> &DefinedNumericVariable,
716 bool IsLegacyLineExpr, Optional<size_t> LineNumber,
717 FileCheckPatternContext *Context, const SourceMgr &SM) {
718 std::unique_ptr<ExpressionAST> ExpressionASTPointer = nullptr;
719 StringRef DefExpr = StringRef();
720 DefinedNumericVariable = None;
721 ExpressionFormat ExplicitFormat = ExpressionFormat();
722
723 // Parse format specifier (NOTE: ',' is also an argument seperator).
724 size_t FormatSpecEnd = Expr.find(',');
725 size_t FunctionStart = Expr.find('(');
726 if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) {
727 Expr = Expr.ltrim(SpaceChars);
728 if (!Expr.consume_front("%"))
729 return ErrorDiagnostic::get(
730 SM, Expr, "invalid matching format specification in expression");
731
732 // Check for unknown matching format specifier and set matching format in
733 // class instance representing this expression.
734 SMLoc fmtloc = SMLoc::getFromPointer(Expr.data());
735 switch (popFront(Expr)) {
736 case 'u':
737 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Unsigned);
738 break;
739 case 'd':
740 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::Signed);
741 break;
742 case 'x':
743 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexLower);
744 break;
745 case 'X':
746 ExplicitFormat = ExpressionFormat(ExpressionFormat::Kind::HexUpper);
747 break;
748 default:
749 return ErrorDiagnostic::get(SM, fmtloc,
750 "invalid format specifier in expression");
751 }
752
753 Expr = Expr.ltrim(SpaceChars);
754 if (!Expr.consume_front(","))
755 return ErrorDiagnostic::get(
756 SM, Expr, "invalid matching format specification in expression");
757 }
758
759 // Save variable definition expression if any.
760 size_t DefEnd = Expr.find(':');
761 if (DefEnd != StringRef::npos) {
762 DefExpr = Expr.substr(0, DefEnd);
763 Expr = Expr.substr(DefEnd + 1);
764 }
765
766 // Parse matching constraint.
767 Expr = Expr.ltrim(SpaceChars);
768 bool HasParsedValidConstraint = false;
769 if (Expr.consume_front("=="))
770 HasParsedValidConstraint = true;
771
772 // Parse the expression itself.
773 Expr = Expr.ltrim(SpaceChars);
774 if (Expr.empty()) {
775 if (HasParsedValidConstraint)
776 return ErrorDiagnostic::get(
777 SM, Expr, "empty numeric expression should not have a constraint");
778 } else {
779 Expr = Expr.rtrim(SpaceChars);
780 StringRef OuterBinOpExpr = Expr;
781 // The first operand in a legacy @LINE expression is always the @LINE
782 // pseudo variable.
783 AllowedOperand AO =
784 IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any;
785 Expected<std::unique_ptr<ExpressionAST>> ParseResult = parseNumericOperand(
786 Expr, AO, !HasParsedValidConstraint, LineNumber, Context, SM);
787 while (ParseResult && !Expr.empty()) {
788 ParseResult = parseBinop(OuterBinOpExpr, Expr, std::move(*ParseResult),
789 IsLegacyLineExpr, LineNumber, Context, SM);
790 // Legacy @LINE expressions only allow 2 operands.
791 if (ParseResult && IsLegacyLineExpr && !Expr.empty())
792 return ErrorDiagnostic::get(
793 SM, Expr,
794 "unexpected characters at end of expression '" + Expr + "'");
795 }
796 if (!ParseResult)
797 return ParseResult.takeError();
798 ExpressionASTPointer = std::move(*ParseResult);
799 }
800
801 // Select format of the expression, i.e. (i) its explicit format, if any,
802 // otherwise (ii) its implicit format, if any, otherwise (iii) the default
803 // format (unsigned). Error out in case of conflicting implicit format
804 // without explicit format.
805 ExpressionFormat Format;
806 if (ExplicitFormat)
807 Format = ExplicitFormat;
808 else if (ExpressionASTPointer) {
809 Expected<ExpressionFormat> ImplicitFormat =
810 ExpressionASTPointer->getImplicitFormat(SM);
811 if (!ImplicitFormat)
812 return ImplicitFormat.takeError();
813 Format = *ImplicitFormat;
814 }
815 if (!Format)
816 Format = ExpressionFormat(ExpressionFormat::Kind::Unsigned);
817
818 std::unique_ptr<Expression> ExpressionPointer =
819 std::make_unique<Expression>(std::move(ExpressionASTPointer), Format);
820
821 // Parse the numeric variable definition.
822 if (DefEnd != StringRef::npos) {
823 DefExpr = DefExpr.ltrim(SpaceChars);
824 Expected<NumericVariable *> ParseResult = parseNumericVariableDefinition(
825 DefExpr, Context, LineNumber, ExpressionPointer->getFormat(), SM);
826
827 if (!ParseResult)
828 return ParseResult.takeError();
829 DefinedNumericVariable = *ParseResult;
830 }
831
832 return std::move(ExpressionPointer);
833 }
834
parsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,const FileCheckRequest & Req)835 bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
836 SourceMgr &SM, const FileCheckRequest &Req) {
837 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
838 IgnoreCase = Req.IgnoreCase;
839
840 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
841
842 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
843 // Ignore trailing whitespace.
844 while (!PatternStr.empty() &&
845 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
846 PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
847
848 // Check that there is something on the line.
849 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
850 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
851 "found empty check string with prefix '" + Prefix + ":'");
852 return true;
853 }
854
855 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
856 SM.PrintMessage(
857 PatternLoc, SourceMgr::DK_Error,
858 "found non-empty check string for empty check with prefix '" + Prefix +
859 ":'");
860 return true;
861 }
862
863 if (CheckTy == Check::CheckEmpty) {
864 RegExStr = "(\n$)";
865 return false;
866 }
867
868 // Check to see if this is a fixed string, or if it has regex pieces.
869 if (!MatchFullLinesHere &&
870 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
871 PatternStr.find("[[") == StringRef::npos))) {
872 FixedStr = PatternStr;
873 return false;
874 }
875
876 if (MatchFullLinesHere) {
877 RegExStr += '^';
878 if (!Req.NoCanonicalizeWhiteSpace)
879 RegExStr += " *";
880 }
881
882 // Paren value #0 is for the fully matched string. Any new parenthesized
883 // values add from there.
884 unsigned CurParen = 1;
885
886 // Otherwise, there is at least one regex piece. Build up the regex pattern
887 // by escaping scary characters in fixed strings, building up one big regex.
888 while (!PatternStr.empty()) {
889 // RegEx matches.
890 if (PatternStr.startswith("{{")) {
891 // This is the start of a regex match. Scan for the }}.
892 size_t End = PatternStr.find("}}");
893 if (End == StringRef::npos) {
894 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
895 SourceMgr::DK_Error,
896 "found start of regex string with no end '}}'");
897 return true;
898 }
899
900 // Enclose {{}} patterns in parens just like [[]] even though we're not
901 // capturing the result for any purpose. This is required in case the
902 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
903 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
904 RegExStr += '(';
905 ++CurParen;
906
907 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
908 return true;
909 RegExStr += ')';
910
911 PatternStr = PatternStr.substr(End + 2);
912 continue;
913 }
914
915 // String and numeric substitution blocks. Pattern substitution blocks come
916 // in two forms: [[foo:.*]] and [[foo]]. The former matches .* (or some
917 // other regex) and assigns it to the string variable 'foo'. The latter
918 // substitutes foo's value. Numeric substitution blocks recognize the same
919 // form as string ones, but start with a '#' sign after the double
920 // brackets. They also accept a combined form which sets a numeric variable
921 // to the evaluation of an expression. Both string and numeric variable
922 // names must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*" to be
923 // valid, as this helps catch some common errors.
924 if (PatternStr.startswith("[[")) {
925 StringRef UnparsedPatternStr = PatternStr.substr(2);
926 // Find the closing bracket pair ending the match. End is going to be an
927 // offset relative to the beginning of the match string.
928 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
929 StringRef MatchStr = UnparsedPatternStr.substr(0, End);
930 bool IsNumBlock = MatchStr.consume_front("#");
931
932 if (End == StringRef::npos) {
933 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
934 SourceMgr::DK_Error,
935 "Invalid substitution block, no ]] found");
936 return true;
937 }
938 // Strip the substitution block we are parsing. End points to the start
939 // of the "]]" closing the expression so account for it in computing the
940 // index of the first unparsed character.
941 PatternStr = UnparsedPatternStr.substr(End + 2);
942
943 bool IsDefinition = false;
944 bool SubstNeeded = false;
945 // Whether the substitution block is a legacy use of @LINE with string
946 // substitution block syntax.
947 bool IsLegacyLineExpr = false;
948 StringRef DefName;
949 StringRef SubstStr;
950 StringRef MatchRegexp;
951 size_t SubstInsertIdx = RegExStr.size();
952
953 // Parse string variable or legacy @LINE expression.
954 if (!IsNumBlock) {
955 size_t VarEndIdx = MatchStr.find(":");
956 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
957 if (SpacePos != StringRef::npos) {
958 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
959 SourceMgr::DK_Error, "unexpected whitespace");
960 return true;
961 }
962
963 // Get the name (e.g. "foo") and verify it is well formed.
964 StringRef OrigMatchStr = MatchStr;
965 Expected<Pattern::VariableProperties> ParseVarResult =
966 parseVariable(MatchStr, SM);
967 if (!ParseVarResult) {
968 logAllUnhandledErrors(ParseVarResult.takeError(), errs());
969 return true;
970 }
971 StringRef Name = ParseVarResult->Name;
972 bool IsPseudo = ParseVarResult->IsPseudo;
973
974 IsDefinition = (VarEndIdx != StringRef::npos);
975 SubstNeeded = !IsDefinition;
976 if (IsDefinition) {
977 if ((IsPseudo || !MatchStr.consume_front(":"))) {
978 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
979 SourceMgr::DK_Error,
980 "invalid name in string variable definition");
981 return true;
982 }
983
984 // Detect collisions between string and numeric variables when the
985 // former is created later than the latter.
986 if (Context->GlobalNumericVariableTable.find(Name) !=
987 Context->GlobalNumericVariableTable.end()) {
988 SM.PrintMessage(
989 SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
990 "numeric variable with name '" + Name + "' already exists");
991 return true;
992 }
993 DefName = Name;
994 MatchRegexp = MatchStr;
995 } else {
996 if (IsPseudo) {
997 MatchStr = OrigMatchStr;
998 IsLegacyLineExpr = IsNumBlock = true;
999 } else
1000 SubstStr = Name;
1001 }
1002 }
1003
1004 // Parse numeric substitution block.
1005 std::unique_ptr<Expression> ExpressionPointer;
1006 Optional<NumericVariable *> DefinedNumericVariable;
1007 if (IsNumBlock) {
1008 Expected<std::unique_ptr<Expression>> ParseResult =
1009 parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable,
1010 IsLegacyLineExpr, LineNumber, Context,
1011 SM);
1012 if (!ParseResult) {
1013 logAllUnhandledErrors(ParseResult.takeError(), errs());
1014 return true;
1015 }
1016 ExpressionPointer = std::move(*ParseResult);
1017 SubstNeeded = ExpressionPointer->getAST() != nullptr;
1018 if (DefinedNumericVariable) {
1019 IsDefinition = true;
1020 DefName = (*DefinedNumericVariable)->getName();
1021 }
1022 if (SubstNeeded)
1023 SubstStr = MatchStr;
1024 else {
1025 ExpressionFormat Format = ExpressionPointer->getFormat();
1026 MatchRegexp = cantFail(Format.getWildcardRegex());
1027 }
1028 }
1029
1030 // Handle variable definition: [[<def>:(...)]] and [[#(...)<def>:(...)]].
1031 if (IsDefinition) {
1032 RegExStr += '(';
1033 ++SubstInsertIdx;
1034
1035 if (IsNumBlock) {
1036 NumericVariableMatch NumericVariableDefinition = {
1037 *DefinedNumericVariable, CurParen};
1038 NumericVariableDefs[DefName] = NumericVariableDefinition;
1039 // This store is done here rather than in match() to allow
1040 // parseNumericVariableUse() to get the pointer to the class instance
1041 // of the right variable definition corresponding to a given numeric
1042 // variable use.
1043 Context->GlobalNumericVariableTable[DefName] =
1044 *DefinedNumericVariable;
1045 } else {
1046 VariableDefs[DefName] = CurParen;
1047 // Mark string variable as defined to detect collisions between
1048 // string and numeric variables in parseNumericVariableUse() and
1049 // defineCmdlineVariables() when the latter is created later than the
1050 // former. We cannot reuse GlobalVariableTable for this by populating
1051 // it with an empty string since we would then lose the ability to
1052 // detect the use of an undefined variable in match().
1053 Context->DefinedVariableTable[DefName] = true;
1054 }
1055
1056 ++CurParen;
1057 }
1058
1059 if (!MatchRegexp.empty() && AddRegExToRegEx(MatchRegexp, CurParen, SM))
1060 return true;
1061
1062 if (IsDefinition)
1063 RegExStr += ')';
1064
1065 // Handle substitutions: [[foo]] and [[#<foo expr>]].
1066 if (SubstNeeded) {
1067 // Handle substitution of string variables that were defined earlier on
1068 // the same line by emitting a backreference. Expressions do not
1069 // support substituting a numeric variable defined on the same line.
1070 if (!IsNumBlock && VariableDefs.find(SubstStr) != VariableDefs.end()) {
1071 unsigned CaptureParenGroup = VariableDefs[SubstStr];
1072 if (CaptureParenGroup < 1 || CaptureParenGroup > 9) {
1073 SM.PrintMessage(SMLoc::getFromPointer(SubstStr.data()),
1074 SourceMgr::DK_Error,
1075 "Can't back-reference more than 9 variables");
1076 return true;
1077 }
1078 AddBackrefToRegEx(CaptureParenGroup);
1079 } else {
1080 // Handle substitution of string variables ([[<var>]]) defined in
1081 // previous CHECK patterns, and substitution of expressions.
1082 Substitution *Substitution =
1083 IsNumBlock
1084 ? Context->makeNumericSubstitution(
1085 SubstStr, std::move(ExpressionPointer), SubstInsertIdx)
1086 : Context->makeStringSubstitution(SubstStr, SubstInsertIdx);
1087 Substitutions.push_back(Substitution);
1088 }
1089 }
1090 }
1091
1092 // Handle fixed string matches.
1093 // Find the end, which is the start of the next regex.
1094 size_t FixedMatchEnd = PatternStr.find("{{");
1095 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
1096 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
1097 PatternStr = PatternStr.substr(FixedMatchEnd);
1098 }
1099
1100 if (MatchFullLinesHere) {
1101 if (!Req.NoCanonicalizeWhiteSpace)
1102 RegExStr += " *";
1103 RegExStr += '$';
1104 }
1105
1106 return false;
1107 }
1108
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)1109 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
1110 Regex R(RS);
1111 std::string Error;
1112 if (!R.isValid(Error)) {
1113 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
1114 "invalid regex: " + Error);
1115 return true;
1116 }
1117
1118 RegExStr += RS.str();
1119 CurParen += R.getNumMatches();
1120 return false;
1121 }
1122
AddBackrefToRegEx(unsigned BackrefNum)1123 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
1124 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
1125 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
1126 RegExStr += Backref;
1127 }
1128
match(StringRef Buffer,size_t & MatchLen,const SourceMgr & SM) const1129 Expected<size_t> Pattern::match(StringRef Buffer, size_t &MatchLen,
1130 const SourceMgr &SM) const {
1131 // If this is the EOF pattern, match it immediately.
1132 if (CheckTy == Check::CheckEOF) {
1133 MatchLen = 0;
1134 return Buffer.size();
1135 }
1136
1137 // If this is a fixed string pattern, just match it now.
1138 if (!FixedStr.empty()) {
1139 MatchLen = FixedStr.size();
1140 size_t Pos =
1141 IgnoreCase ? Buffer.find_lower(FixedStr) : Buffer.find(FixedStr);
1142 if (Pos == StringRef::npos)
1143 return make_error<NotFoundError>();
1144 return Pos;
1145 }
1146
1147 // Regex match.
1148
1149 // If there are substitutions, we need to create a temporary string with the
1150 // actual value.
1151 StringRef RegExToMatch = RegExStr;
1152 std::string TmpStr;
1153 if (!Substitutions.empty()) {
1154 TmpStr = RegExStr;
1155 if (LineNumber)
1156 Context->LineVariable->setValue(ExpressionValue(*LineNumber));
1157
1158 size_t InsertOffset = 0;
1159 // Substitute all string variables and expressions whose values are only
1160 // now known. Use of string variables defined on the same line are handled
1161 // by back-references.
1162 for (const auto &Substitution : Substitutions) {
1163 // Substitute and check for failure (e.g. use of undefined variable).
1164 Expected<std::string> Value = Substitution->getResult();
1165 if (!Value) {
1166 // Convert to an ErrorDiagnostic to get location information. This is
1167 // done here rather than PrintNoMatch since now we know which
1168 // substitution block caused the overflow.
1169 Error Err =
1170 handleErrors(Value.takeError(), [&](const OverflowError &E) {
1171 return ErrorDiagnostic::get(SM, Substitution->getFromString(),
1172 "unable to substitute variable or "
1173 "numeric expression: overflow error");
1174 });
1175 return std::move(Err);
1176 }
1177
1178 // Plop it into the regex at the adjusted offset.
1179 TmpStr.insert(TmpStr.begin() + Substitution->getIndex() + InsertOffset,
1180 Value->begin(), Value->end());
1181 InsertOffset += Value->size();
1182 }
1183
1184 // Match the newly constructed regex.
1185 RegExToMatch = TmpStr;
1186 }
1187
1188 SmallVector<StringRef, 4> MatchInfo;
1189 unsigned int Flags = Regex::Newline;
1190 if (IgnoreCase)
1191 Flags |= Regex::IgnoreCase;
1192 if (!Regex(RegExToMatch, Flags).match(Buffer, &MatchInfo))
1193 return make_error<NotFoundError>();
1194
1195 // Successful regex match.
1196 assert(!MatchInfo.empty() && "Didn't get any match");
1197 StringRef FullMatch = MatchInfo[0];
1198
1199 // If this defines any string variables, remember their values.
1200 for (const auto &VariableDef : VariableDefs) {
1201 assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
1202 Context->GlobalVariableTable[VariableDef.first] =
1203 MatchInfo[VariableDef.second];
1204 }
1205
1206 // If this defines any numeric variables, remember their values.
1207 for (const auto &NumericVariableDef : NumericVariableDefs) {
1208 const NumericVariableMatch &NumericVariableMatch =
1209 NumericVariableDef.getValue();
1210 unsigned CaptureParenGroup = NumericVariableMatch.CaptureParenGroup;
1211 assert(CaptureParenGroup < MatchInfo.size() && "Internal paren error");
1212 NumericVariable *DefinedNumericVariable =
1213 NumericVariableMatch.DefinedNumericVariable;
1214
1215 StringRef MatchedValue = MatchInfo[CaptureParenGroup];
1216 ExpressionFormat Format = DefinedNumericVariable->getImplicitFormat();
1217 Expected<ExpressionValue> Value =
1218 Format.valueFromStringRepr(MatchedValue, SM);
1219 if (!Value)
1220 return Value.takeError();
1221 DefinedNumericVariable->setValue(*Value);
1222 }
1223
1224 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
1225 // the required preceding newline, which is consumed by the pattern in the
1226 // case of CHECK-EMPTY but not CHECK-NEXT.
1227 size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
1228 MatchLen = FullMatch.size() - MatchStartSkip;
1229 return FullMatch.data() - Buffer.data() + MatchStartSkip;
1230 }
1231
computeMatchDistance(StringRef Buffer) const1232 unsigned Pattern::computeMatchDistance(StringRef Buffer) const {
1233 // Just compute the number of matching characters. For regular expressions, we
1234 // just compare against the regex itself and hope for the best.
1235 //
1236 // FIXME: One easy improvement here is have the regex lib generate a single
1237 // example regular expression which matches, and use that as the example
1238 // string.
1239 StringRef ExampleString(FixedStr);
1240 if (ExampleString.empty())
1241 ExampleString = RegExStr;
1242
1243 // Only compare up to the first line in the buffer, or the string size.
1244 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
1245 BufferPrefix = BufferPrefix.split('\n').first;
1246 return BufferPrefix.edit_distance(ExampleString);
1247 }
1248
printSubstitutions(const SourceMgr & SM,StringRef Buffer,SMRange MatchRange) const1249 void Pattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
1250 SMRange MatchRange) const {
1251 // Print what we know about substitutions.
1252 if (!Substitutions.empty()) {
1253 for (const auto &Substitution : Substitutions) {
1254 SmallString<256> Msg;
1255 raw_svector_ostream OS(Msg);
1256 Expected<std::string> MatchedValue = Substitution->getResult();
1257
1258 // Substitution failed or is not known at match time, print the undefined
1259 // variables it uses.
1260 if (!MatchedValue) {
1261 bool UndefSeen = false;
1262 handleAllErrors(
1263 MatchedValue.takeError(), [](const NotFoundError &E) {},
1264 // Handled in PrintNoMatch().
1265 [](const ErrorDiagnostic &E) {},
1266 // Handled in match().
1267 [](const OverflowError &E) {},
1268 [&](const UndefVarError &E) {
1269 if (!UndefSeen) {
1270 OS << "uses undefined variable(s):";
1271 UndefSeen = true;
1272 }
1273 OS << " ";
1274 E.log(OS);
1275 });
1276 } else {
1277 // Substitution succeeded. Print substituted value.
1278 OS << "with \"";
1279 OS.write_escaped(Substitution->getFromString()) << "\" equal to \"";
1280 OS.write_escaped(*MatchedValue) << "\"";
1281 }
1282
1283 if (MatchRange.isValid())
1284 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
1285 {MatchRange});
1286 else
1287 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
1288 SourceMgr::DK_Note, OS.str());
1289 }
1290 }
1291 }
1292
ProcessMatchResult(FileCheckDiag::MatchType MatchTy,const SourceMgr & SM,SMLoc Loc,Check::FileCheckType CheckTy,StringRef Buffer,size_t Pos,size_t Len,std::vector<FileCheckDiag> * Diags,bool AdjustPrevDiag=false)1293 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
1294 const SourceMgr &SM, SMLoc Loc,
1295 Check::FileCheckType CheckTy,
1296 StringRef Buffer, size_t Pos, size_t Len,
1297 std::vector<FileCheckDiag> *Diags,
1298 bool AdjustPrevDiag = false) {
1299 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
1300 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
1301 SMRange Range(Start, End);
1302 if (Diags) {
1303 if (AdjustPrevDiag)
1304 Diags->rbegin()->MatchTy = MatchTy;
1305 else
1306 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
1307 }
1308 return Range;
1309 }
1310
printFuzzyMatch(const SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags) const1311 void Pattern::printFuzzyMatch(const SourceMgr &SM, StringRef Buffer,
1312 std::vector<FileCheckDiag> *Diags) const {
1313 // Attempt to find the closest/best fuzzy match. Usually an error happens
1314 // because some string in the output didn't exactly match. In these cases, we
1315 // would like to show the user a best guess at what "should have" matched, to
1316 // save them having to actually check the input manually.
1317 size_t NumLinesForward = 0;
1318 size_t Best = StringRef::npos;
1319 double BestQuality = 0;
1320
1321 // Use an arbitrary 4k limit on how far we will search.
1322 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
1323 if (Buffer[i] == '\n')
1324 ++NumLinesForward;
1325
1326 // Patterns have leading whitespace stripped, so skip whitespace when
1327 // looking for something which looks like a pattern.
1328 if (Buffer[i] == ' ' || Buffer[i] == '\t')
1329 continue;
1330
1331 // Compute the "quality" of this match as an arbitrary combination of the
1332 // match distance and the number of lines skipped to get to this match.
1333 unsigned Distance = computeMatchDistance(Buffer.substr(i));
1334 double Quality = Distance + (NumLinesForward / 100.);
1335
1336 if (Quality < BestQuality || Best == StringRef::npos) {
1337 Best = i;
1338 BestQuality = Quality;
1339 }
1340 }
1341
1342 // Print the "possible intended match here" line if we found something
1343 // reasonable and not equal to what we showed in the "scanning from here"
1344 // line.
1345 if (Best && Best != StringRef::npos && BestQuality < 50) {
1346 SMRange MatchRange =
1347 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
1348 getCheckTy(), Buffer, Best, 0, Diags);
1349 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
1350 "possible intended match here");
1351
1352 // FIXME: If we wanted to be really friendly we would show why the match
1353 // failed, as it can be hard to spot simple one character differences.
1354 }
1355 }
1356
1357 Expected<StringRef>
getPatternVarValue(StringRef VarName)1358 FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
1359 auto VarIter = GlobalVariableTable.find(VarName);
1360 if (VarIter == GlobalVariableTable.end())
1361 return make_error<UndefVarError>(VarName);
1362
1363 return VarIter->second;
1364 }
1365
1366 template <class... Types>
makeNumericVariable(Types...args)1367 NumericVariable *FileCheckPatternContext::makeNumericVariable(Types... args) {
1368 NumericVariables.push_back(std::make_unique<NumericVariable>(args...));
1369 return NumericVariables.back().get();
1370 }
1371
1372 Substitution *
makeStringSubstitution(StringRef VarName,size_t InsertIdx)1373 FileCheckPatternContext::makeStringSubstitution(StringRef VarName,
1374 size_t InsertIdx) {
1375 Substitutions.push_back(
1376 std::make_unique<StringSubstitution>(this, VarName, InsertIdx));
1377 return Substitutions.back().get();
1378 }
1379
makeNumericSubstitution(StringRef ExpressionStr,std::unique_ptr<Expression> Expression,size_t InsertIdx)1380 Substitution *FileCheckPatternContext::makeNumericSubstitution(
1381 StringRef ExpressionStr, std::unique_ptr<Expression> Expression,
1382 size_t InsertIdx) {
1383 Substitutions.push_back(std::make_unique<NumericSubstitution>(
1384 this, ExpressionStr, std::move(Expression), InsertIdx));
1385 return Substitutions.back().get();
1386 }
1387
FindRegexVarEnd(StringRef Str,SourceMgr & SM)1388 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
1389 // Offset keeps track of the current offset within the input Str
1390 size_t Offset = 0;
1391 // [...] Nesting depth
1392 size_t BracketDepth = 0;
1393
1394 while (!Str.empty()) {
1395 if (Str.startswith("]]") && BracketDepth == 0)
1396 return Offset;
1397 if (Str[0] == '\\') {
1398 // Backslash escapes the next char within regexes, so skip them both.
1399 Str = Str.substr(2);
1400 Offset += 2;
1401 } else {
1402 switch (Str[0]) {
1403 default:
1404 break;
1405 case '[':
1406 BracketDepth++;
1407 break;
1408 case ']':
1409 if (BracketDepth == 0) {
1410 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
1411 SourceMgr::DK_Error,
1412 "missing closing \"]\" for regex variable");
1413 exit(1);
1414 }
1415 BracketDepth--;
1416 break;
1417 }
1418 Str = Str.substr(1);
1419 Offset++;
1420 }
1421 }
1422
1423 return StringRef::npos;
1424 }
1425
CanonicalizeFile(MemoryBuffer & MB,SmallVectorImpl<char> & OutputBuffer)1426 StringRef FileCheck::CanonicalizeFile(MemoryBuffer &MB,
1427 SmallVectorImpl<char> &OutputBuffer) {
1428 OutputBuffer.reserve(MB.getBufferSize());
1429
1430 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
1431 Ptr != End; ++Ptr) {
1432 // Eliminate trailing dosish \r.
1433 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
1434 continue;
1435 }
1436
1437 // If current char is not a horizontal whitespace or if horizontal
1438 // whitespace canonicalization is disabled, dump it to output as is.
1439 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
1440 OutputBuffer.push_back(*Ptr);
1441 continue;
1442 }
1443
1444 // Otherwise, add one space and advance over neighboring space.
1445 OutputBuffer.push_back(' ');
1446 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
1447 ++Ptr;
1448 }
1449
1450 // Add a null byte and then return all but that byte.
1451 OutputBuffer.push_back('\0');
1452 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
1453 }
1454
FileCheckDiag(const SourceMgr & SM,const Check::FileCheckType & CheckTy,SMLoc CheckLoc,MatchType MatchTy,SMRange InputRange)1455 FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
1456 const Check::FileCheckType &CheckTy,
1457 SMLoc CheckLoc, MatchType MatchTy,
1458 SMRange InputRange)
1459 : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy) {
1460 auto Start = SM.getLineAndColumn(InputRange.Start);
1461 auto End = SM.getLineAndColumn(InputRange.End);
1462 InputStartLine = Start.first;
1463 InputStartCol = Start.second;
1464 InputEndLine = End.first;
1465 InputEndCol = End.second;
1466 }
1467
IsPartOfWord(char c)1468 static bool IsPartOfWord(char c) {
1469 return (isAlnum(c) || c == '-' || c == '_');
1470 }
1471
setCount(int C)1472 Check::FileCheckType &Check::FileCheckType::setCount(int C) {
1473 assert(Count > 0 && "zero and negative counts are not supported");
1474 assert((C == 1 || Kind == CheckPlain) &&
1475 "count supported only for plain CHECK directives");
1476 Count = C;
1477 return *this;
1478 }
1479
getDescription(StringRef Prefix) const1480 std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
1481 switch (Kind) {
1482 case Check::CheckNone:
1483 return "invalid";
1484 case Check::CheckPlain:
1485 if (Count > 1)
1486 return Prefix.str() + "-COUNT";
1487 return std::string(Prefix);
1488 case Check::CheckNext:
1489 return Prefix.str() + "-NEXT";
1490 case Check::CheckSame:
1491 return Prefix.str() + "-SAME";
1492 case Check::CheckNot:
1493 return Prefix.str() + "-NOT";
1494 case Check::CheckDAG:
1495 return Prefix.str() + "-DAG";
1496 case Check::CheckLabel:
1497 return Prefix.str() + "-LABEL";
1498 case Check::CheckEmpty:
1499 return Prefix.str() + "-EMPTY";
1500 case Check::CheckComment:
1501 return std::string(Prefix);
1502 case Check::CheckEOF:
1503 return "implicit EOF";
1504 case Check::CheckBadNot:
1505 return "bad NOT";
1506 case Check::CheckBadCount:
1507 return "bad COUNT";
1508 }
1509 llvm_unreachable("unknown FileCheckType");
1510 }
1511
1512 static std::pair<Check::FileCheckType, StringRef>
FindCheckType(const FileCheckRequest & Req,StringRef Buffer,StringRef Prefix)1513 FindCheckType(const FileCheckRequest &Req, StringRef Buffer, StringRef Prefix) {
1514 if (Buffer.size() <= Prefix.size())
1515 return {Check::CheckNone, StringRef()};
1516
1517 char NextChar = Buffer[Prefix.size()];
1518
1519 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
1520
1521 // Check for comment.
1522 if (Req.CommentPrefixes.end() != std::find(Req.CommentPrefixes.begin(),
1523 Req.CommentPrefixes.end(),
1524 Prefix)) {
1525 if (NextChar == ':')
1526 return {Check::CheckComment, Rest};
1527 // Ignore a comment prefix if it has a suffix like "-NOT".
1528 return {Check::CheckNone, StringRef()};
1529 }
1530
1531 // Verify that the : is present after the prefix.
1532 if (NextChar == ':')
1533 return {Check::CheckPlain, Rest};
1534
1535 if (NextChar != '-')
1536 return {Check::CheckNone, StringRef()};
1537
1538 if (Rest.consume_front("COUNT-")) {
1539 int64_t Count;
1540 if (Rest.consumeInteger(10, Count))
1541 // Error happened in parsing integer.
1542 return {Check::CheckBadCount, Rest};
1543 if (Count <= 0 || Count > INT32_MAX)
1544 return {Check::CheckBadCount, Rest};
1545 if (!Rest.consume_front(":"))
1546 return {Check::CheckBadCount, Rest};
1547 return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
1548 }
1549
1550 if (Rest.consume_front("NEXT:"))
1551 return {Check::CheckNext, Rest};
1552
1553 if (Rest.consume_front("SAME:"))
1554 return {Check::CheckSame, Rest};
1555
1556 if (Rest.consume_front("NOT:"))
1557 return {Check::CheckNot, Rest};
1558
1559 if (Rest.consume_front("DAG:"))
1560 return {Check::CheckDAG, Rest};
1561
1562 if (Rest.consume_front("LABEL:"))
1563 return {Check::CheckLabel, Rest};
1564
1565 if (Rest.consume_front("EMPTY:"))
1566 return {Check::CheckEmpty, Rest};
1567
1568 // You can't combine -NOT with another suffix.
1569 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
1570 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
1571 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
1572 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
1573 return {Check::CheckBadNot, Rest};
1574
1575 return {Check::CheckNone, Rest};
1576 }
1577
1578 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)1579 static size_t SkipWord(StringRef Str, size_t Loc) {
1580 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
1581 ++Loc;
1582 return Loc;
1583 }
1584
1585 /// Searches the buffer for the first prefix in the prefix regular expression.
1586 ///
1587 /// This searches the buffer using the provided regular expression, however it
1588 /// enforces constraints beyond that:
1589 /// 1) The found prefix must not be a suffix of something that looks like
1590 /// a valid prefix.
1591 /// 2) The found prefix must be followed by a valid check type suffix using \c
1592 /// FindCheckType above.
1593 ///
1594 /// \returns a pair of StringRefs into the Buffer, which combines:
1595 /// - the first match of the regular expression to satisfy these two is
1596 /// returned,
1597 /// otherwise an empty StringRef is returned to indicate failure.
1598 /// - buffer rewound to the location right after parsed suffix, for parsing
1599 /// to continue from
1600 ///
1601 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
1602 /// start at the beginning of the returned prefix, increment \p LineNumber for
1603 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
1604 /// check found by examining the suffix.
1605 ///
1606 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
1607 /// is unspecified.
1608 static std::pair<StringRef, StringRef>
FindFirstMatchingPrefix(const FileCheckRequest & Req,Regex & PrefixRE,StringRef & Buffer,unsigned & LineNumber,Check::FileCheckType & CheckTy)1609 FindFirstMatchingPrefix(const FileCheckRequest &Req, Regex &PrefixRE,
1610 StringRef &Buffer, unsigned &LineNumber,
1611 Check::FileCheckType &CheckTy) {
1612 SmallVector<StringRef, 2> Matches;
1613
1614 while (!Buffer.empty()) {
1615 // Find the first (longest) match using the RE.
1616 if (!PrefixRE.match(Buffer, &Matches))
1617 // No match at all, bail.
1618 return {StringRef(), StringRef()};
1619
1620 StringRef Prefix = Matches[0];
1621 Matches.clear();
1622
1623 assert(Prefix.data() >= Buffer.data() &&
1624 Prefix.data() < Buffer.data() + Buffer.size() &&
1625 "Prefix doesn't start inside of buffer!");
1626 size_t Loc = Prefix.data() - Buffer.data();
1627 StringRef Skipped = Buffer.substr(0, Loc);
1628 Buffer = Buffer.drop_front(Loc);
1629 LineNumber += Skipped.count('\n');
1630
1631 // Check that the matched prefix isn't a suffix of some other check-like
1632 // word.
1633 // FIXME: This is a very ad-hoc check. it would be better handled in some
1634 // other way. Among other things it seems hard to distinguish between
1635 // intentional and unintentional uses of this feature.
1636 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
1637 // Now extract the type.
1638 StringRef AfterSuffix;
1639 std::tie(CheckTy, AfterSuffix) = FindCheckType(Req, Buffer, Prefix);
1640
1641 // If we've found a valid check type for this prefix, we're done.
1642 if (CheckTy != Check::CheckNone)
1643 return {Prefix, AfterSuffix};
1644 }
1645
1646 // If we didn't successfully find a prefix, we need to skip this invalid
1647 // prefix and continue scanning. We directly skip the prefix that was
1648 // matched and any additional parts of that check-like word.
1649 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
1650 }
1651
1652 // We ran out of buffer while skipping partial matches so give up.
1653 return {StringRef(), StringRef()};
1654 }
1655
createLineVariable()1656 void FileCheckPatternContext::createLineVariable() {
1657 assert(!LineVariable && "@LINE pseudo numeric variable already created");
1658 StringRef LineName = "@LINE";
1659 LineVariable = makeNumericVariable(
1660 LineName, ExpressionFormat(ExpressionFormat::Kind::Unsigned));
1661 GlobalNumericVariableTable[LineName] = LineVariable;
1662 }
1663
FileCheck(FileCheckRequest Req)1664 FileCheck::FileCheck(FileCheckRequest Req)
1665 : Req(Req), PatternContext(std::make_unique<FileCheckPatternContext>()),
1666 CheckStrings(std::make_unique<std::vector<FileCheckString>>()) {}
1667
1668 FileCheck::~FileCheck() = default;
1669
readCheckFile(SourceMgr & SM,StringRef Buffer,Regex & PrefixRE,std::pair<unsigned,unsigned> * ImpPatBufferIDRange)1670 bool FileCheck::readCheckFile(
1671 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
1672 std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
1673 if (ImpPatBufferIDRange)
1674 ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
1675
1676 Error DefineError =
1677 PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
1678 if (DefineError) {
1679 logAllUnhandledErrors(std::move(DefineError), errs());
1680 return true;
1681 }
1682
1683 PatternContext->createLineVariable();
1684
1685 std::vector<Pattern> ImplicitNegativeChecks;
1686 for (StringRef PatternString : Req.ImplicitCheckNot) {
1687 // Create a buffer with fake command line content in order to display the
1688 // command line option responsible for the specific implicit CHECK-NOT.
1689 std::string Prefix = "-implicit-check-not='";
1690 std::string Suffix = "'";
1691 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
1692 (Prefix + PatternString + Suffix).str(), "command line");
1693
1694 StringRef PatternInBuffer =
1695 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
1696 unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
1697 if (ImpPatBufferIDRange) {
1698 if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) {
1699 ImpPatBufferIDRange->first = BufferID;
1700 ImpPatBufferIDRange->second = BufferID + 1;
1701 } else {
1702 assert(BufferID == ImpPatBufferIDRange->second &&
1703 "expected consecutive source buffer IDs");
1704 ++ImpPatBufferIDRange->second;
1705 }
1706 }
1707
1708 ImplicitNegativeChecks.push_back(
1709 Pattern(Check::CheckNot, PatternContext.get()));
1710 ImplicitNegativeChecks.back().parsePattern(PatternInBuffer,
1711 "IMPLICIT-CHECK", SM, Req);
1712 }
1713
1714 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
1715
1716 // LineNumber keeps track of the line on which CheckPrefix instances are
1717 // found.
1718 unsigned LineNumber = 1;
1719
1720 bool FoundUsedCheckPrefix = false;
1721 while (1) {
1722 Check::FileCheckType CheckTy;
1723
1724 // See if a prefix occurs in the memory buffer.
1725 StringRef UsedPrefix;
1726 StringRef AfterSuffix;
1727 std::tie(UsedPrefix, AfterSuffix) =
1728 FindFirstMatchingPrefix(Req, PrefixRE, Buffer, LineNumber, CheckTy);
1729 if (UsedPrefix.empty())
1730 break;
1731 if (CheckTy != Check::CheckComment)
1732 FoundUsedCheckPrefix = true;
1733
1734 assert(UsedPrefix.data() == Buffer.data() &&
1735 "Failed to move Buffer's start forward, or pointed prefix outside "
1736 "of the buffer!");
1737 assert(AfterSuffix.data() >= Buffer.data() &&
1738 AfterSuffix.data() < Buffer.data() + Buffer.size() &&
1739 "Parsing after suffix doesn't start inside of buffer!");
1740
1741 // Location to use for error messages.
1742 const char *UsedPrefixStart = UsedPrefix.data();
1743
1744 // Skip the buffer to the end of parsed suffix (or just prefix, if no good
1745 // suffix was processed).
1746 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
1747 : AfterSuffix;
1748
1749 // Complain about useful-looking but unsupported suffixes.
1750 if (CheckTy == Check::CheckBadNot) {
1751 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1752 "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
1753 return true;
1754 }
1755
1756 // Complain about invalid count specification.
1757 if (CheckTy == Check::CheckBadCount) {
1758 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
1759 "invalid count in -COUNT specification on prefix '" +
1760 UsedPrefix + "'");
1761 return true;
1762 }
1763
1764 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
1765 // leading whitespace.
1766 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
1767 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
1768
1769 // Scan ahead to the end of line.
1770 size_t EOL = Buffer.find_first_of("\n\r");
1771
1772 // Remember the location of the start of the pattern, for diagnostics.
1773 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
1774
1775 // Extract the pattern from the buffer.
1776 StringRef PatternBuffer = Buffer.substr(0, EOL);
1777 Buffer = Buffer.substr(EOL);
1778
1779 // If this is a comment, we're done.
1780 if (CheckTy == Check::CheckComment)
1781 continue;
1782
1783 // Parse the pattern.
1784 Pattern P(CheckTy, PatternContext.get(), LineNumber);
1785 if (P.parsePattern(PatternBuffer, UsedPrefix, SM, Req))
1786 return true;
1787
1788 // Verify that CHECK-LABEL lines do not define or use variables
1789 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
1790 SM.PrintMessage(
1791 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
1792 "found '" + UsedPrefix + "-LABEL:'"
1793 " with variable definition or use");
1794 return true;
1795 }
1796
1797 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
1798 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
1799 CheckTy == Check::CheckEmpty) &&
1800 CheckStrings->empty()) {
1801 StringRef Type = CheckTy == Check::CheckNext
1802 ? "NEXT"
1803 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
1804 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
1805 SourceMgr::DK_Error,
1806 "found '" + UsedPrefix + "-" + Type +
1807 "' without previous '" + UsedPrefix + ": line");
1808 return true;
1809 }
1810
1811 // Handle CHECK-DAG/-NOT.
1812 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
1813 DagNotMatches.push_back(P);
1814 continue;
1815 }
1816
1817 // Okay, add the string we captured to the output vector and move on.
1818 CheckStrings->emplace_back(P, UsedPrefix, PatternLoc);
1819 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1820 DagNotMatches = ImplicitNegativeChecks;
1821 }
1822
1823 // When there are no used prefixes we report an error except in the case that
1824 // no prefix is specified explicitly but -implicit-check-not is specified.
1825 if (!FoundUsedCheckPrefix &&
1826 (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) {
1827 errs() << "error: no check strings found with prefix"
1828 << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
1829 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
1830 if (I != 0)
1831 errs() << ", ";
1832 errs() << "\'" << Req.CheckPrefixes[I] << ":'";
1833 }
1834 errs() << '\n';
1835 return true;
1836 }
1837
1838 // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs,
1839 // and use the first prefix as a filler for the error message.
1840 if (!DagNotMatches.empty()) {
1841 CheckStrings->emplace_back(
1842 Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
1843 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
1844 std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
1845 }
1846
1847 return false;
1848 }
1849
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1850 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1851 StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1852 int MatchedCount, StringRef Buffer, size_t MatchPos,
1853 size_t MatchLen, const FileCheckRequest &Req,
1854 std::vector<FileCheckDiag> *Diags) {
1855 bool PrintDiag = true;
1856 if (ExpectedMatch) {
1857 if (!Req.Verbose)
1858 return;
1859 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1860 return;
1861 // Due to their verbosity, we don't print verbose diagnostics here if we're
1862 // gathering them for a different rendering, but we always print other
1863 // diagnostics.
1864 PrintDiag = !Diags;
1865 }
1866 SMRange MatchRange = ProcessMatchResult(
1867 ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
1868 : FileCheckDiag::MatchFoundButExcluded,
1869 SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
1870 if (!PrintDiag)
1871 return;
1872
1873 std::string Message = formatv("{0}: {1} string found in input",
1874 Pat.getCheckTy().getDescription(Prefix),
1875 (ExpectedMatch ? "expected" : "excluded"))
1876 .str();
1877 if (Pat.getCount() > 1)
1878 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1879
1880 SM.PrintMessage(
1881 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
1882 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
1883 {MatchRange});
1884 Pat.printSubstitutions(SM, Buffer, MatchRange);
1885 }
1886
PrintMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,size_t MatchPos,size_t MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags)1887 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1888 const FileCheckString &CheckStr, int MatchedCount,
1889 StringRef Buffer, size_t MatchPos, size_t MatchLen,
1890 FileCheckRequest &Req,
1891 std::vector<FileCheckDiag> *Diags) {
1892 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1893 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
1894 }
1895
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,StringRef Prefix,SMLoc Loc,const Pattern & Pat,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1896 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1897 StringRef Prefix, SMLoc Loc, const Pattern &Pat,
1898 int MatchedCount, StringRef Buffer,
1899 bool VerboseVerbose, std::vector<FileCheckDiag> *Diags,
1900 Error MatchErrors) {
1901 assert(MatchErrors && "Called on successful match");
1902 bool PrintDiag = true;
1903 if (!ExpectedMatch) {
1904 if (!VerboseVerbose) {
1905 consumeError(std::move(MatchErrors));
1906 return;
1907 }
1908 // Due to their verbosity, we don't print verbose diagnostics here if we're
1909 // gathering them for a different rendering, but we always print other
1910 // diagnostics.
1911 PrintDiag = !Diags;
1912 }
1913
1914 // If the current position is at the end of a line, advance to the start of
1915 // the next line.
1916 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1917 SMRange SearchRange = ProcessMatchResult(
1918 ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
1919 : FileCheckDiag::MatchNoneAndExcluded,
1920 SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
1921 if (!PrintDiag) {
1922 consumeError(std::move(MatchErrors));
1923 return;
1924 }
1925
1926 MatchErrors = handleErrors(std::move(MatchErrors),
1927 [](const ErrorDiagnostic &E) { E.log(errs()); });
1928
1929 // No problem matching the string per se.
1930 if (!MatchErrors)
1931 return;
1932 consumeError(std::move(MatchErrors));
1933
1934 // Print "not found" diagnostic.
1935 std::string Message = formatv("{0}: {1} string not found in input",
1936 Pat.getCheckTy().getDescription(Prefix),
1937 (ExpectedMatch ? "expected" : "excluded"))
1938 .str();
1939 if (Pat.getCount() > 1)
1940 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1941 SM.PrintMessage(
1942 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
1943
1944 // Print the "scanning from here" line.
1945 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
1946
1947 // Allow the pattern to print additional information if desired.
1948 Pat.printSubstitutions(SM, Buffer);
1949
1950 if (ExpectedMatch)
1951 Pat.printFuzzyMatch(SM, Buffer, Diags);
1952 }
1953
PrintNoMatch(bool ExpectedMatch,const SourceMgr & SM,const FileCheckString & CheckStr,int MatchedCount,StringRef Buffer,bool VerboseVerbose,std::vector<FileCheckDiag> * Diags,Error MatchErrors)1954 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1955 const FileCheckString &CheckStr, int MatchedCount,
1956 StringRef Buffer, bool VerboseVerbose,
1957 std::vector<FileCheckDiag> *Diags, Error MatchErrors) {
1958 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1959 MatchedCount, Buffer, VerboseVerbose, Diags,
1960 std::move(MatchErrors));
1961 }
1962
1963 /// Counts the number of newlines in the specified range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1964 static unsigned CountNumNewlinesBetween(StringRef Range,
1965 const char *&FirstNewLine) {
1966 unsigned NumNewLines = 0;
1967 while (1) {
1968 // Scan for newline.
1969 Range = Range.substr(Range.find_first_of("\n\r"));
1970 if (Range.empty())
1971 return NumNewLines;
1972
1973 ++NumNewLines;
1974
1975 // Handle \n\r and \r\n as a single newline.
1976 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1977 (Range[0] != Range[1]))
1978 Range = Range.substr(1);
1979 Range = Range.substr(1);
1980
1981 if (NumNewLines == 1)
1982 FirstNewLine = Range.begin();
1983 }
1984 }
1985
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const1986 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
1987 bool IsLabelScanMode, size_t &MatchLen,
1988 FileCheckRequest &Req,
1989 std::vector<FileCheckDiag> *Diags) const {
1990 size_t LastPos = 0;
1991 std::vector<const Pattern *> NotStrings;
1992
1993 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1994 // bounds; we have not processed variable definitions within the bounded block
1995 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1996 // over the block again (including the last CHECK-LABEL) in normal mode.
1997 if (!IsLabelScanMode) {
1998 // Match "dag strings" (with mixed "not strings" if any).
1999 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
2000 if (LastPos == StringRef::npos)
2001 return StringRef::npos;
2002 }
2003
2004 // Match itself from the last position after matching CHECK-DAG.
2005 size_t LastMatchEnd = LastPos;
2006 size_t FirstMatchPos = 0;
2007 // Go match the pattern Count times. Majority of patterns only match with
2008 // count 1 though.
2009 assert(Pat.getCount() != 0 && "pattern count can not be zero");
2010 for (int i = 1; i <= Pat.getCount(); i++) {
2011 StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
2012 size_t CurrentMatchLen;
2013 // get a match at current start point
2014 Expected<size_t> MatchResult = Pat.match(MatchBuffer, CurrentMatchLen, SM);
2015
2016 // report
2017 if (!MatchResult) {
2018 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags,
2019 MatchResult.takeError());
2020 return StringRef::npos;
2021 }
2022 size_t MatchPos = *MatchResult;
2023 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
2024 Diags);
2025 if (i == 1)
2026 FirstMatchPos = LastPos + MatchPos;
2027
2028 // move start point after the match
2029 LastMatchEnd += MatchPos + CurrentMatchLen;
2030 }
2031 // Full match len counts from first match pos.
2032 MatchLen = LastMatchEnd - FirstMatchPos;
2033
2034 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
2035 // or CHECK-NOT
2036 if (!IsLabelScanMode) {
2037 size_t MatchPos = FirstMatchPos - LastPos;
2038 StringRef MatchBuffer = Buffer.substr(LastPos);
2039 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
2040
2041 // If this check is a "CHECK-NEXT", verify that the previous match was on
2042 // the previous line (i.e. that there is one newline between them).
2043 if (CheckNext(SM, SkippedRegion)) {
2044 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
2045 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
2046 Diags, Req.Verbose);
2047 return StringRef::npos;
2048 }
2049
2050 // If this check is a "CHECK-SAME", verify that the previous match was on
2051 // the same line (i.e. that there is no newline between them).
2052 if (CheckSame(SM, SkippedRegion)) {
2053 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
2054 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
2055 Diags, Req.Verbose);
2056 return StringRef::npos;
2057 }
2058
2059 // If this match had "not strings", verify that they don't exist in the
2060 // skipped region.
2061 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
2062 return StringRef::npos;
2063 }
2064
2065 return FirstMatchPos;
2066 }
2067
CheckNext(const SourceMgr & SM,StringRef Buffer) const2068 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
2069 if (Pat.getCheckTy() != Check::CheckNext &&
2070 Pat.getCheckTy() != Check::CheckEmpty)
2071 return false;
2072
2073 Twine CheckName =
2074 Prefix +
2075 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
2076
2077 // Count the number of newlines between the previous match and this one.
2078 const char *FirstNewLine = nullptr;
2079 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
2080
2081 if (NumNewLines == 0) {
2082 SM.PrintMessage(Loc, SourceMgr::DK_Error,
2083 CheckName + ": is on the same line as previous match");
2084 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2085 "'next' match was here");
2086 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2087 "previous match ended here");
2088 return true;
2089 }
2090
2091 if (NumNewLines != 1) {
2092 SM.PrintMessage(Loc, SourceMgr::DK_Error,
2093 CheckName +
2094 ": is not on the line after the previous match");
2095 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2096 "'next' match was here");
2097 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2098 "previous match ended here");
2099 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
2100 "non-matching line after previous match is here");
2101 return true;
2102 }
2103
2104 return false;
2105 }
2106
CheckSame(const SourceMgr & SM,StringRef Buffer) const2107 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
2108 if (Pat.getCheckTy() != Check::CheckSame)
2109 return false;
2110
2111 // Count the number of newlines between the previous match and this one.
2112 const char *FirstNewLine = nullptr;
2113 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
2114
2115 if (NumNewLines != 0) {
2116 SM.PrintMessage(Loc, SourceMgr::DK_Error,
2117 Prefix +
2118 "-SAME: is not on the same line as the previous match");
2119 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
2120 "'next' match was here");
2121 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
2122 "previous match ended here");
2123 return true;
2124 }
2125
2126 return false;
2127 }
2128
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const2129 bool FileCheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
2130 const std::vector<const Pattern *> &NotStrings,
2131 const FileCheckRequest &Req,
2132 std::vector<FileCheckDiag> *Diags) const {
2133 for (const Pattern *Pat : NotStrings) {
2134 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
2135
2136 size_t MatchLen = 0;
2137 Expected<size_t> MatchResult = Pat->match(Buffer, MatchLen, SM);
2138
2139 if (!MatchResult) {
2140 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
2141 Req.VerboseVerbose, Diags, MatchResult.takeError());
2142 continue;
2143 }
2144 size_t Pos = *MatchResult;
2145
2146 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
2147 Req, Diags);
2148
2149 return true;
2150 }
2151
2152 return false;
2153 }
2154
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,const FileCheckRequest & Req,std::vector<FileCheckDiag> * Diags) const2155 size_t FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
2156 std::vector<const Pattern *> &NotStrings,
2157 const FileCheckRequest &Req,
2158 std::vector<FileCheckDiag> *Diags) const {
2159 if (DagNotStrings.empty())
2160 return 0;
2161
2162 // The start of the search range.
2163 size_t StartPos = 0;
2164
2165 struct MatchRange {
2166 size_t Pos;
2167 size_t End;
2168 };
2169 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
2170 // ranges are erased from this list once they are no longer in the search
2171 // range.
2172 std::list<MatchRange> MatchRanges;
2173
2174 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
2175 // group, so we don't use a range-based for loop here.
2176 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
2177 PatItr != PatEnd; ++PatItr) {
2178 const Pattern &Pat = *PatItr;
2179 assert((Pat.getCheckTy() == Check::CheckDAG ||
2180 Pat.getCheckTy() == Check::CheckNot) &&
2181 "Invalid CHECK-DAG or CHECK-NOT!");
2182
2183 if (Pat.getCheckTy() == Check::CheckNot) {
2184 NotStrings.push_back(&Pat);
2185 continue;
2186 }
2187
2188 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
2189
2190 // CHECK-DAG always matches from the start.
2191 size_t MatchLen = 0, MatchPos = StartPos;
2192
2193 // Search for a match that doesn't overlap a previous match in this
2194 // CHECK-DAG group.
2195 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
2196 StringRef MatchBuffer = Buffer.substr(MatchPos);
2197 Expected<size_t> MatchResult = Pat.match(MatchBuffer, MatchLen, SM);
2198 // With a group of CHECK-DAGs, a single mismatching means the match on
2199 // that group of CHECK-DAGs fails immediately.
2200 if (!MatchResult) {
2201 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
2202 Req.VerboseVerbose, Diags, MatchResult.takeError());
2203 return StringRef::npos;
2204 }
2205 size_t MatchPosBuf = *MatchResult;
2206 // Re-calc it as the offset relative to the start of the original string.
2207 MatchPos += MatchPosBuf;
2208 if (Req.VerboseVerbose)
2209 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
2210 MatchLen, Req, Diags);
2211 MatchRange M{MatchPos, MatchPos + MatchLen};
2212 if (Req.AllowDeprecatedDagOverlap) {
2213 // We don't need to track all matches in this mode, so we just maintain
2214 // one match range that encompasses the current CHECK-DAG group's
2215 // matches.
2216 if (MatchRanges.empty())
2217 MatchRanges.insert(MatchRanges.end(), M);
2218 else {
2219 auto Block = MatchRanges.begin();
2220 Block->Pos = std::min(Block->Pos, M.Pos);
2221 Block->End = std::max(Block->End, M.End);
2222 }
2223 break;
2224 }
2225 // Iterate previous matches until overlapping match or insertion point.
2226 bool Overlap = false;
2227 for (; MI != ME; ++MI) {
2228 if (M.Pos < MI->End) {
2229 // !Overlap => New match has no overlap and is before this old match.
2230 // Overlap => New match overlaps this old match.
2231 Overlap = MI->Pos < M.End;
2232 break;
2233 }
2234 }
2235 if (!Overlap) {
2236 // Insert non-overlapping match into list.
2237 MatchRanges.insert(MI, M);
2238 break;
2239 }
2240 if (Req.VerboseVerbose) {
2241 // Due to their verbosity, we don't print verbose diagnostics here if
2242 // we're gathering them for a different rendering, but we always print
2243 // other diagnostics.
2244 if (!Diags) {
2245 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
2246 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
2247 SMRange OldRange(OldStart, OldEnd);
2248 SM.PrintMessage(OldStart, SourceMgr::DK_Note,
2249 "match discarded, overlaps earlier DAG match here",
2250 {OldRange});
2251 } else
2252 Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
2253 }
2254 MatchPos = MI->End;
2255 }
2256 if (!Req.VerboseVerbose)
2257 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
2258 MatchLen, Req, Diags);
2259
2260 // Handle the end of a CHECK-DAG group.
2261 if (std::next(PatItr) == PatEnd ||
2262 std::next(PatItr)->getCheckTy() == Check::CheckNot) {
2263 if (!NotStrings.empty()) {
2264 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
2265 // CHECK-DAG, verify that there are no 'not' strings occurred in that
2266 // region.
2267 StringRef SkippedRegion =
2268 Buffer.slice(StartPos, MatchRanges.begin()->Pos);
2269 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
2270 return StringRef::npos;
2271 // Clear "not strings".
2272 NotStrings.clear();
2273 }
2274 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
2275 // end of this CHECK-DAG group's match range.
2276 StartPos = MatchRanges.rbegin()->End;
2277 // Don't waste time checking for (impossible) overlaps before that.
2278 MatchRanges.clear();
2279 }
2280 }
2281
2282 return StartPos;
2283 }
2284
ValidatePrefixes(StringRef Kind,StringSet<> & UniquePrefixes,ArrayRef<StringRef> SuppliedPrefixes)2285 static bool ValidatePrefixes(StringRef Kind, StringSet<> &UniquePrefixes,
2286 ArrayRef<StringRef> SuppliedPrefixes) {
2287 for (StringRef Prefix : SuppliedPrefixes) {
2288 if (Prefix.empty()) {
2289 errs() << "error: supplied " << Kind << " prefix must not be the empty "
2290 << "string\n";
2291 return false;
2292 }
2293 static const Regex Validator("^[a-zA-Z0-9_-]*$");
2294 if (!Validator.match(Prefix)) {
2295 errs() << "error: supplied " << Kind << " prefix must start with a "
2296 << "letter and contain only alphanumeric characters, hyphens, and "
2297 << "underscores: '" << Prefix << "'\n";
2298 return false;
2299 }
2300 if (!UniquePrefixes.insert(Prefix).second) {
2301 errs() << "error: supplied " << Kind << " prefix must be unique among "
2302 << "check and comment prefixes: '" << Prefix << "'\n";
2303 return false;
2304 }
2305 }
2306 return true;
2307 }
2308
2309 static const char *DefaultCheckPrefixes[] = {"CHECK"};
2310 static const char *DefaultCommentPrefixes[] = {"COM", "RUN"};
2311
ValidateCheckPrefixes()2312 bool FileCheck::ValidateCheckPrefixes() {
2313 StringSet<> UniquePrefixes;
2314 // Add default prefixes to catch user-supplied duplicates of them below.
2315 if (Req.CheckPrefixes.empty()) {
2316 for (const char *Prefix : DefaultCheckPrefixes)
2317 UniquePrefixes.insert(Prefix);
2318 }
2319 if (Req.CommentPrefixes.empty()) {
2320 for (const char *Prefix : DefaultCommentPrefixes)
2321 UniquePrefixes.insert(Prefix);
2322 }
2323 // Do not validate the default prefixes, or diagnostics about duplicates might
2324 // incorrectly indicate that they were supplied by the user.
2325 if (!ValidatePrefixes("check", UniquePrefixes, Req.CheckPrefixes))
2326 return false;
2327 if (!ValidatePrefixes("comment", UniquePrefixes, Req.CommentPrefixes))
2328 return false;
2329 return true;
2330 }
2331
buildCheckPrefixRegex()2332 Regex FileCheck::buildCheckPrefixRegex() {
2333 if (Req.CheckPrefixes.empty()) {
2334 for (const char *Prefix : DefaultCheckPrefixes)
2335 Req.CheckPrefixes.push_back(Prefix);
2336 Req.IsDefaultCheckPrefix = true;
2337 }
2338 if (Req.CommentPrefixes.empty()) {
2339 for (const char *Prefix : DefaultCommentPrefixes)
2340 Req.CommentPrefixes.push_back(Prefix);
2341 }
2342
2343 // We already validated the contents of CheckPrefixes and CommentPrefixes so
2344 // just concatenate them as alternatives.
2345 SmallString<32> PrefixRegexStr;
2346 for (size_t I = 0, E = Req.CheckPrefixes.size(); I != E; ++I) {
2347 if (I != 0)
2348 PrefixRegexStr.push_back('|');
2349 PrefixRegexStr.append(Req.CheckPrefixes[I]);
2350 }
2351 for (StringRef Prefix : Req.CommentPrefixes) {
2352 PrefixRegexStr.push_back('|');
2353 PrefixRegexStr.append(Prefix);
2354 }
2355
2356 return Regex(PrefixRegexStr);
2357 }
2358
defineCmdlineVariables(ArrayRef<StringRef> CmdlineDefines,SourceMgr & SM)2359 Error FileCheckPatternContext::defineCmdlineVariables(
2360 ArrayRef<StringRef> CmdlineDefines, SourceMgr &SM) {
2361 assert(GlobalVariableTable.empty() && GlobalNumericVariableTable.empty() &&
2362 "Overriding defined variable with command-line variable definitions");
2363
2364 if (CmdlineDefines.empty())
2365 return Error::success();
2366
2367 // Create a string representing the vector of command-line definitions. Each
2368 // definition is on its own line and prefixed with a definition number to
2369 // clarify which definition a given diagnostic corresponds to.
2370 unsigned I = 0;
2371 Error Errs = Error::success();
2372 std::string CmdlineDefsDiag;
2373 SmallVector<std::pair<size_t, size_t>, 4> CmdlineDefsIndices;
2374 for (StringRef CmdlineDef : CmdlineDefines) {
2375 std::string DefPrefix = ("Global define #" + Twine(++I) + ": ").str();
2376 size_t EqIdx = CmdlineDef.find('=');
2377 if (EqIdx == StringRef::npos) {
2378 CmdlineDefsIndices.push_back(std::make_pair(CmdlineDefsDiag.size(), 0));
2379 continue;
2380 }
2381 // Numeric variable definition.
2382 if (CmdlineDef[0] == '#') {
2383 // Append a copy of the command-line definition adapted to use the same
2384 // format as in the input file to be able to reuse
2385 // parseNumericSubstitutionBlock.
2386 CmdlineDefsDiag += (DefPrefix + CmdlineDef + " (parsed as: [[").str();
2387 std::string SubstitutionStr = std::string(CmdlineDef);
2388 SubstitutionStr[EqIdx] = ':';
2389 CmdlineDefsIndices.push_back(
2390 std::make_pair(CmdlineDefsDiag.size(), SubstitutionStr.size()));
2391 CmdlineDefsDiag += (SubstitutionStr + Twine("]])\n")).str();
2392 } else {
2393 CmdlineDefsDiag += DefPrefix;
2394 CmdlineDefsIndices.push_back(
2395 std::make_pair(CmdlineDefsDiag.size(), CmdlineDef.size()));
2396 CmdlineDefsDiag += (CmdlineDef + "\n").str();
2397 }
2398 }
2399
2400 // Create a buffer with fake command line content in order to display
2401 // parsing diagnostic with location information and point to the
2402 // global definition with invalid syntax.
2403 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
2404 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
2405 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
2406 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
2407
2408 for (std::pair<size_t, size_t> CmdlineDefIndices : CmdlineDefsIndices) {
2409 StringRef CmdlineDef = CmdlineDefsDiagRef.substr(CmdlineDefIndices.first,
2410 CmdlineDefIndices.second);
2411 if (CmdlineDef.empty()) {
2412 Errs = joinErrors(
2413 std::move(Errs),
2414 ErrorDiagnostic::get(SM, CmdlineDef,
2415 "missing equal sign in global definition"));
2416 continue;
2417 }
2418
2419 // Numeric variable definition.
2420 if (CmdlineDef[0] == '#') {
2421 // Now parse the definition both to check that the syntax is correct and
2422 // to create the necessary class instance.
2423 StringRef CmdlineDefExpr = CmdlineDef.substr(1);
2424 Optional<NumericVariable *> DefinedNumericVariable;
2425 Expected<std::unique_ptr<Expression>> ExpressionResult =
2426 Pattern::parseNumericSubstitutionBlock(
2427 CmdlineDefExpr, DefinedNumericVariable, false, None, this, SM);
2428 if (!ExpressionResult) {
2429 Errs = joinErrors(std::move(Errs), ExpressionResult.takeError());
2430 continue;
2431 }
2432 std::unique_ptr<Expression> Expression = std::move(*ExpressionResult);
2433 // Now evaluate the expression whose value this variable should be set
2434 // to, since the expression of a command-line variable definition should
2435 // only use variables defined earlier on the command-line. If not, this
2436 // is an error and we report it.
2437 Expected<ExpressionValue> Value = Expression->getAST()->eval();
2438 if (!Value) {
2439 Errs = joinErrors(std::move(Errs), Value.takeError());
2440 continue;
2441 }
2442
2443 assert(DefinedNumericVariable && "No variable defined");
2444 (*DefinedNumericVariable)->setValue(*Value);
2445
2446 // Record this variable definition.
2447 GlobalNumericVariableTable[(*DefinedNumericVariable)->getName()] =
2448 *DefinedNumericVariable;
2449 } else {
2450 // String variable definition.
2451 std::pair<StringRef, StringRef> CmdlineNameVal = CmdlineDef.split('=');
2452 StringRef CmdlineName = CmdlineNameVal.first;
2453 StringRef OrigCmdlineName = CmdlineName;
2454 Expected<Pattern::VariableProperties> ParseVarResult =
2455 Pattern::parseVariable(CmdlineName, SM);
2456 if (!ParseVarResult) {
2457 Errs = joinErrors(std::move(Errs), ParseVarResult.takeError());
2458 continue;
2459 }
2460 // Check that CmdlineName does not denote a pseudo variable is only
2461 // composed of the parsed numeric variable. This catches cases like
2462 // "FOO+2" in a "FOO+2=10" definition.
2463 if (ParseVarResult->IsPseudo || !CmdlineName.empty()) {
2464 Errs = joinErrors(std::move(Errs),
2465 ErrorDiagnostic::get(
2466 SM, OrigCmdlineName,
2467 "invalid name in string variable definition '" +
2468 OrigCmdlineName + "'"));
2469 continue;
2470 }
2471 StringRef Name = ParseVarResult->Name;
2472
2473 // Detect collisions between string and numeric variables when the former
2474 // is created later than the latter.
2475 if (GlobalNumericVariableTable.find(Name) !=
2476 GlobalNumericVariableTable.end()) {
2477 Errs = joinErrors(std::move(Errs),
2478 ErrorDiagnostic::get(SM, Name,
2479 "numeric variable with name '" +
2480 Name + "' already exists"));
2481 continue;
2482 }
2483 GlobalVariableTable.insert(CmdlineNameVal);
2484 // Mark the string variable as defined to detect collisions between
2485 // string and numeric variables in defineCmdlineVariables when the latter
2486 // is created later than the former. We cannot reuse GlobalVariableTable
2487 // for this by populating it with an empty string since we would then
2488 // lose the ability to detect the use of an undefined variable in
2489 // match().
2490 DefinedVariableTable[Name] = true;
2491 }
2492 }
2493
2494 return Errs;
2495 }
2496
clearLocalVars()2497 void FileCheckPatternContext::clearLocalVars() {
2498 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
2499 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
2500 if (Var.first()[0] != '$')
2501 LocalPatternVars.push_back(Var.first());
2502
2503 // Numeric substitution reads the value of a variable directly, not via
2504 // GlobalNumericVariableTable. Therefore, we clear local variables by
2505 // clearing their value which will lead to a numeric substitution failure. We
2506 // also mark the variable for removal from GlobalNumericVariableTable since
2507 // this is what defineCmdlineVariables checks to decide that no global
2508 // variable has been defined.
2509 for (const auto &Var : GlobalNumericVariableTable)
2510 if (Var.first()[0] != '$') {
2511 Var.getValue()->clearValue();
2512 LocalNumericVars.push_back(Var.first());
2513 }
2514
2515 for (const auto &Var : LocalPatternVars)
2516 GlobalVariableTable.erase(Var);
2517 for (const auto &Var : LocalNumericVars)
2518 GlobalNumericVariableTable.erase(Var);
2519 }
2520
checkInput(SourceMgr & SM,StringRef Buffer,std::vector<FileCheckDiag> * Diags)2521 bool FileCheck::checkInput(SourceMgr &SM, StringRef Buffer,
2522 std::vector<FileCheckDiag> *Diags) {
2523 bool ChecksFailed = false;
2524
2525 unsigned i = 0, j = 0, e = CheckStrings->size();
2526 while (true) {
2527 StringRef CheckRegion;
2528 if (j == e) {
2529 CheckRegion = Buffer;
2530 } else {
2531 const FileCheckString &CheckLabelStr = (*CheckStrings)[j];
2532 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
2533 ++j;
2534 continue;
2535 }
2536
2537 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
2538 size_t MatchLabelLen = 0;
2539 size_t MatchLabelPos =
2540 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
2541 if (MatchLabelPos == StringRef::npos)
2542 // Immediately bail if CHECK-LABEL fails, nothing else we can do.
2543 return false;
2544
2545 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
2546 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
2547 ++j;
2548 }
2549
2550 // Do not clear the first region as it's the one before the first
2551 // CHECK-LABEL and it would clear variables defined on the command-line
2552 // before they get used.
2553 if (i != 0 && Req.EnableVarScope)
2554 PatternContext->clearLocalVars();
2555
2556 for (; i != j; ++i) {
2557 const FileCheckString &CheckStr = (*CheckStrings)[i];
2558
2559 // Check each string within the scanned region, including a second check
2560 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
2561 size_t MatchLen = 0;
2562 size_t MatchPos =
2563 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
2564
2565 if (MatchPos == StringRef::npos) {
2566 ChecksFailed = true;
2567 i = j;
2568 break;
2569 }
2570
2571 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
2572 }
2573
2574 if (j == e)
2575 break;
2576 }
2577
2578 // Success if no checks failed.
2579 return !ChecksFailed;
2580 }
2581