1 //===- YAMLRemarkParser.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides utility methods used by clients that want to use the
10 // parser for remark diagnostics in LLVM.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "YAMLRemarkParser.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringSwitch.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Path.h"
19 #include <optional>
20 
21 using namespace llvm;
22 using namespace llvm::remarks;
23 
24 char YAMLParseError::ID = 0;
25 
26 static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) {
27   assert(Ctx && "Expected non-null Ctx in diagnostic handler.");
28   std::string &Message = *static_cast<std::string *>(Ctx);
29   assert(Message.empty() && "Expected an empty string.");
30   raw_string_ostream OS(Message);
31   Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false,
32              /*ShowKindLabels*/ true);
33   OS << '\n';
34   OS.flush();
35 }
36 
37 YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM,
38                                yaml::Stream &Stream, yaml::Node &Node) {
39   // 1) Set up a diagnostic handler to avoid errors being printed out to
40   // stderr.
41   // 2) Use the stream to print the error with the associated node.
42   // 3) The stream will use the source manager to print the error, which will
43   // call the diagnostic handler.
44   // 4) The diagnostic handler will stream the error directly into this object's
45   // Message member, which is used when logging is asked for.
46   auto OldDiagHandler = SM.getDiagHandler();
47   auto OldDiagCtx = SM.getDiagContext();
48   SM.setDiagHandler(handleDiagnostic, &Message);
49   Stream.printError(&Node, Twine(Msg) + Twine('\n'));
50   // Restore the old handlers.
51   SM.setDiagHandler(OldDiagHandler, OldDiagCtx);
52 }
53 
54 static SourceMgr setupSM(std::string &LastErrorMessage) {
55   SourceMgr SM;
56   SM.setDiagHandler(handleDiagnostic, &LastErrorMessage);
57   return SM;
58 }
59 
60 // Parse the magic number. This function returns true if this represents remark
61 // metadata, false otherwise.
62 static Expected<bool> parseMagic(StringRef &Buf) {
63   if (!Buf.consume_front(remarks::Magic))
64     return false;
65 
66   if (Buf.size() < 1 || !Buf.consume_front(StringRef("\0", 1)))
67     return createStringError(std::errc::illegal_byte_sequence,
68                              "Expecting \\0 after magic number.");
69   return true;
70 }
71 
72 static Expected<uint64_t> parseVersion(StringRef &Buf) {
73   if (Buf.size() < sizeof(uint64_t))
74     return createStringError(std::errc::illegal_byte_sequence,
75                              "Expecting version number.");
76 
77   uint64_t Version =
78       support::endian::read<uint64_t, support::little, support::unaligned>(
79           Buf.data());
80   if (Version != remarks::CurrentRemarkVersion)
81     return createStringError(std::errc::illegal_byte_sequence,
82                              "Mismatching remark version. Got %" PRId64
83                              ", expected %" PRId64 ".",
84                              Version, remarks::CurrentRemarkVersion);
85   Buf = Buf.drop_front(sizeof(uint64_t));
86   return Version;
87 }
88 
89 static Expected<uint64_t> parseStrTabSize(StringRef &Buf) {
90   if (Buf.size() < sizeof(uint64_t))
91     return createStringError(std::errc::illegal_byte_sequence,
92                              "Expecting string table size.");
93   uint64_t StrTabSize =
94       support::endian::read<uint64_t, support::little, support::unaligned>(
95           Buf.data());
96   Buf = Buf.drop_front(sizeof(uint64_t));
97   return StrTabSize;
98 }
99 
100 static Expected<ParsedStringTable> parseStrTab(StringRef &Buf,
101                                                uint64_t StrTabSize) {
102   if (Buf.size() < StrTabSize)
103     return createStringError(std::errc::illegal_byte_sequence,
104                              "Expecting string table.");
105 
106   // Attach the string table to the parser.
107   ParsedStringTable Result(StringRef(Buf.data(), StrTabSize));
108   Buf = Buf.drop_front(StrTabSize);
109   return Expected<ParsedStringTable>(std::move(Result));
110 }
111 
112 Expected<std::unique_ptr<YAMLRemarkParser>> remarks::createYAMLParserFromMeta(
113     StringRef Buf, std::optional<ParsedStringTable> StrTab,
114     std::optional<StringRef> ExternalFilePrependPath) {
115   // We now have a magic number. The metadata has to be correct.
116   Expected<bool> isMeta = parseMagic(Buf);
117   if (!isMeta)
118     return isMeta.takeError();
119   // If it's not recognized as metadata, roll back.
120   std::unique_ptr<MemoryBuffer> SeparateBuf;
121   if (*isMeta) {
122     Expected<uint64_t> Version = parseVersion(Buf);
123     if (!Version)
124       return Version.takeError();
125 
126     Expected<uint64_t> StrTabSize = parseStrTabSize(Buf);
127     if (!StrTabSize)
128       return StrTabSize.takeError();
129 
130     // If the size of string table is not 0, try to build one.
131     if (*StrTabSize != 0) {
132       if (StrTab)
133         return createStringError(std::errc::illegal_byte_sequence,
134                                  "String table already provided.");
135       Expected<ParsedStringTable> MaybeStrTab = parseStrTab(Buf, *StrTabSize);
136       if (!MaybeStrTab)
137         return MaybeStrTab.takeError();
138       StrTab = std::move(*MaybeStrTab);
139     }
140     // If it starts with "---", there is no external file.
141     if (!Buf.startswith("---")) {
142       // At this point, we expect Buf to contain the external file path.
143       StringRef ExternalFilePath = Buf;
144       SmallString<80> FullPath;
145       if (ExternalFilePrependPath)
146         FullPath = *ExternalFilePrependPath;
147       sys::path::append(FullPath, ExternalFilePath);
148 
149       // Try to open the file and start parsing from there.
150       ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
151           MemoryBuffer::getFile(FullPath);
152       if (std::error_code EC = BufferOrErr.getError())
153         return createFileError(FullPath, EC);
154 
155       // Keep the buffer alive.
156       SeparateBuf = std::move(*BufferOrErr);
157       Buf = SeparateBuf->getBuffer();
158     }
159   }
160 
161   std::unique_ptr<YAMLRemarkParser> Result =
162       StrTab
163           ? std::make_unique<YAMLStrTabRemarkParser>(Buf, std::move(*StrTab))
164           : std::make_unique<YAMLRemarkParser>(Buf);
165   if (SeparateBuf)
166     Result->SeparateBuf = std::move(SeparateBuf);
167   return std::move(Result);
168 }
169 
170 YAMLRemarkParser::YAMLRemarkParser(StringRef Buf)
171     : YAMLRemarkParser(Buf, std::nullopt) {}
172 
173 YAMLRemarkParser::YAMLRemarkParser(StringRef Buf,
174                                    std::optional<ParsedStringTable> StrTab)
175     : RemarkParser{Format::YAML}, StrTab(std::move(StrTab)),
176       SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {}
177 
178 Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) {
179   return make_error<YAMLParseError>(Message, SM, Stream, Node);
180 }
181 
182 Error YAMLRemarkParser::error() {
183   if (LastErrorMessage.empty())
184     return Error::success();
185   Error E = make_error<YAMLParseError>(LastErrorMessage);
186   LastErrorMessage.clear();
187   return E;
188 }
189 
190 Expected<std::unique_ptr<Remark>>
191 YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) {
192   if (Error E = error())
193     return std::move(E);
194 
195   yaml::Node *YAMLRoot = RemarkEntry.getRoot();
196   if (!YAMLRoot) {
197     return createStringError(std::make_error_code(std::errc::invalid_argument),
198                              "not a valid YAML file.");
199   }
200 
201   auto *Root = dyn_cast<yaml::MappingNode>(YAMLRoot);
202   if (!Root)
203     return error("document root is not of mapping type.", *YAMLRoot);
204 
205   std::unique_ptr<Remark> Result = std::make_unique<Remark>();
206   Remark &TheRemark = *Result;
207 
208   // First, the type. It needs special handling since is not part of the
209   // key-value stream.
210   Expected<Type> T = parseType(*Root);
211   if (!T)
212     return T.takeError();
213   else
214     TheRemark.RemarkType = *T;
215 
216   // Then, parse the fields, one by one.
217   for (yaml::KeyValueNode &RemarkField : *Root) {
218     Expected<StringRef> MaybeKey = parseKey(RemarkField);
219     if (!MaybeKey)
220       return MaybeKey.takeError();
221     StringRef KeyName = *MaybeKey;
222 
223     if (KeyName == "Pass") {
224       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
225         TheRemark.PassName = *MaybeStr;
226       else
227         return MaybeStr.takeError();
228     } else if (KeyName == "Name") {
229       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
230         TheRemark.RemarkName = *MaybeStr;
231       else
232         return MaybeStr.takeError();
233     } else if (KeyName == "Function") {
234       if (Expected<StringRef> MaybeStr = parseStr(RemarkField))
235         TheRemark.FunctionName = *MaybeStr;
236       else
237         return MaybeStr.takeError();
238     } else if (KeyName == "Hotness") {
239       if (Expected<unsigned> MaybeU = parseUnsigned(RemarkField))
240         TheRemark.Hotness = *MaybeU;
241       else
242         return MaybeU.takeError();
243     } else if (KeyName == "DebugLoc") {
244       if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(RemarkField))
245         TheRemark.Loc = *MaybeLoc;
246       else
247         return MaybeLoc.takeError();
248     } else if (KeyName == "Args") {
249       auto *Args = dyn_cast<yaml::SequenceNode>(RemarkField.getValue());
250       if (!Args)
251         return error("wrong value type for key.", RemarkField);
252 
253       for (yaml::Node &Arg : *Args) {
254         if (Expected<Argument> MaybeArg = parseArg(Arg))
255           TheRemark.Args.push_back(*MaybeArg);
256         else
257           return MaybeArg.takeError();
258       }
259     } else {
260       return error("unknown key.", RemarkField);
261     }
262   }
263 
264   // Check if any of the mandatory fields are missing.
265   if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() ||
266       TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty())
267     return error("Type, Pass, Name or Function missing.",
268                  *RemarkEntry.getRoot());
269 
270   return std::move(Result);
271 }
272 
273 Expected<Type> YAMLRemarkParser::parseType(yaml::MappingNode &Node) {
274   auto Type = StringSwitch<remarks::Type>(Node.getRawTag())
275                   .Case("!Passed", remarks::Type::Passed)
276                   .Case("!Missed", remarks::Type::Missed)
277                   .Case("!Analysis", remarks::Type::Analysis)
278                   .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute)
279                   .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing)
280                   .Case("!Failure", remarks::Type::Failure)
281                   .Default(remarks::Type::Unknown);
282   if (Type == remarks::Type::Unknown)
283     return error("expected a remark tag.", Node);
284   return Type;
285 }
286 
287 Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
288   if (auto *Key = dyn_cast<yaml::ScalarNode>(Node.getKey()))
289     return Key->getRawValue();
290 
291   return error("key is not a string.", Node);
292 }
293 
294 Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
295   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
296   yaml::BlockScalarNode *ValueBlock;
297   StringRef Result;
298   if (!Value) {
299     // Try to parse the value as a block node.
300     ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
301     if (!ValueBlock)
302       return error("expected a value of scalar type.", Node);
303     Result = ValueBlock->getValue();
304   } else
305     Result = Value->getRawValue();
306 
307   if (Result.front() == '\'')
308     Result = Result.drop_front();
309 
310   if (Result.back() == '\'')
311     Result = Result.drop_back();
312 
313   return Result;
314 }
315 
316 Expected<unsigned> YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) {
317   SmallVector<char, 4> Tmp;
318   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
319   if (!Value)
320     return error("expected a value of scalar type.", Node);
321   unsigned UnsignedValue = 0;
322   if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue))
323     return error("expected a value of integer type.", *Value);
324   return UnsignedValue;
325 }
326 
327 Expected<RemarkLocation>
328 YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) {
329   auto *DebugLoc = dyn_cast<yaml::MappingNode>(Node.getValue());
330   if (!DebugLoc)
331     return error("expected a value of mapping type.", Node);
332 
333   std::optional<StringRef> File;
334   std::optional<unsigned> Line;
335   std::optional<unsigned> Column;
336 
337   for (yaml::KeyValueNode &DLNode : *DebugLoc) {
338     Expected<StringRef> MaybeKey = parseKey(DLNode);
339     if (!MaybeKey)
340       return MaybeKey.takeError();
341     StringRef KeyName = *MaybeKey;
342 
343     if (KeyName == "File") {
344       if (Expected<StringRef> MaybeStr = parseStr(DLNode))
345         File = *MaybeStr;
346       else
347         return MaybeStr.takeError();
348     } else if (KeyName == "Column") {
349       if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
350         Column = *MaybeU;
351       else
352         return MaybeU.takeError();
353     } else if (KeyName == "Line") {
354       if (Expected<unsigned> MaybeU = parseUnsigned(DLNode))
355         Line = *MaybeU;
356       else
357         return MaybeU.takeError();
358     } else {
359       return error("unknown entry in DebugLoc map.", DLNode);
360     }
361   }
362 
363   // If any of the debug loc fields is missing, return an error.
364   if (!File || !Line || !Column)
365     return error("DebugLoc node incomplete.", Node);
366 
367   return RemarkLocation{*File, *Line, *Column};
368 }
369 
370 Expected<Argument> YAMLRemarkParser::parseArg(yaml::Node &Node) {
371   auto *ArgMap = dyn_cast<yaml::MappingNode>(&Node);
372   if (!ArgMap)
373     return error("expected a value of mapping type.", Node);
374 
375   std::optional<StringRef> KeyStr;
376   std::optional<StringRef> ValueStr;
377   std::optional<RemarkLocation> Loc;
378 
379   for (yaml::KeyValueNode &ArgEntry : *ArgMap) {
380     Expected<StringRef> MaybeKey = parseKey(ArgEntry);
381     if (!MaybeKey)
382       return MaybeKey.takeError();
383     StringRef KeyName = *MaybeKey;
384 
385     // Try to parse debug locs.
386     if (KeyName == "DebugLoc") {
387       // Can't have multiple DebugLoc entries per argument.
388       if (Loc)
389         return error("only one DebugLoc entry is allowed per argument.",
390                      ArgEntry);
391 
392       if (Expected<RemarkLocation> MaybeLoc = parseDebugLoc(ArgEntry)) {
393         Loc = *MaybeLoc;
394         continue;
395       } else
396         return MaybeLoc.takeError();
397     }
398 
399     // If we already have a string, error out.
400     if (ValueStr)
401       return error("only one string entry is allowed per argument.", ArgEntry);
402 
403     // Try to parse the value.
404     if (Expected<StringRef> MaybeStr = parseStr(ArgEntry))
405       ValueStr = *MaybeStr;
406     else
407       return MaybeStr.takeError();
408 
409     // Keep the key from the string.
410     KeyStr = KeyName;
411   }
412 
413   if (!KeyStr)
414     return error("argument key is missing.", *ArgMap);
415   if (!ValueStr)
416     return error("argument value is missing.", *ArgMap);
417 
418   return Argument{*KeyStr, *ValueStr, Loc};
419 }
420 
421 Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
422   if (YAMLIt == Stream.end())
423     return make_error<EndOfFileError>();
424 
425   Expected<std::unique_ptr<Remark>> MaybeResult = parseRemark(*YAMLIt);
426   if (!MaybeResult) {
427     // Avoid garbage input, set the iterator to the end.
428     YAMLIt = Stream.end();
429     return MaybeResult.takeError();
430   }
431 
432   ++YAMLIt;
433 
434   return std::move(*MaybeResult);
435 }
436 
437 Expected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
438   auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
439   yaml::BlockScalarNode *ValueBlock;
440   StringRef Result;
441   if (!Value) {
442     // Try to parse the value as a block node.
443     ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
444     if (!ValueBlock)
445       return error("expected a value of scalar type.", Node);
446     Result = ValueBlock->getValue();
447   } else
448     Result = Value->getRawValue();
449   // If we have a string table, parse it as an unsigned.
450   unsigned StrID = 0;
451   if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
452     StrID = *MaybeStrID;
453   else
454     return MaybeStrID.takeError();
455 
456   if (Expected<StringRef> Str = (*StrTab)[StrID])
457     Result = *Str;
458   else
459     return Str.takeError();
460 
461   if (Result.front() == '\'')
462     Result = Result.drop_front();
463 
464   if (Result.back() == '\'')
465     Result = Result.drop_back();
466 
467   return Result;
468 }
469