1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SourceMgr class.  This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
11 // simple parsers.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <limits>
32 #include <memory>
33 #include <string>
34 #include <utility>
35 
36 using namespace llvm;
37 
38 static const size_t TabStop = 8;
39 
40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41                                    SMLoc IncludeLoc,
42                                    std::string &IncludedFile) {
43   IncludedFile = Filename;
44   ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45       MemoryBuffer::getFile(IncludedFile);
46 
47   // If the file didn't exist directly, see if it's in an include path.
48   for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49        ++i) {
50     IncludedFile =
51         IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52     NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
53   }
54 
55   if (!NewBufOrErr)
56     return 0;
57 
58   return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
59 }
60 
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62   for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63     if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64         // Use <= here so that a pointer to the null at the end of the buffer
65         // is included as part of the buffer.
66         Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67       return i + 1;
68   return 0;
69 }
70 
71 template <typename T>
72 static std::vector<T> &GetOrCreateOffsetCache(void *&OffsetCache,
73                                               MemoryBuffer *Buffer) {
74   if (OffsetCache)
75     return *static_cast<std::vector<T> *>(OffsetCache);
76 
77   // Lazily fill in the offset cache.
78   auto *Offsets = new std::vector<T>();
79   size_t Sz = Buffer->getBufferSize();
80   assert(Sz <= std::numeric_limits<T>::max());
81   StringRef S = Buffer->getBuffer();
82   for (size_t N = 0; N < Sz; ++N) {
83     if (S[N] == '\n')
84       Offsets->push_back(static_cast<T>(N));
85   }
86 
87   OffsetCache = Offsets;
88   return *Offsets;
89 }
90 
91 template <typename T>
92 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
93   std::vector<T> &Offsets =
94       GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
95 
96   const char *BufStart = Buffer->getBufferStart();
97   assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
98   ptrdiff_t PtrDiff = Ptr - BufStart;
99   assert(PtrDiff >= 0 &&
100          static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
101   T PtrOffset = static_cast<T>(PtrDiff);
102 
103   // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
104   // the line number.
105   return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
106 }
107 
108 /// Look up a given \p Ptr in in the buffer, determining which line it came
109 /// from.
110 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
111   size_t Sz = Buffer->getBufferSize();
112   if (Sz <= std::numeric_limits<uint8_t>::max())
113     return getLineNumberSpecialized<uint8_t>(Ptr);
114   else if (Sz <= std::numeric_limits<uint16_t>::max())
115     return getLineNumberSpecialized<uint16_t>(Ptr);
116   else if (Sz <= std::numeric_limits<uint32_t>::max())
117     return getLineNumberSpecialized<uint32_t>(Ptr);
118   else
119     return getLineNumberSpecialized<uint64_t>(Ptr);
120 }
121 
122 template <typename T>
123 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
124     unsigned LineNo) const {
125   std::vector<T> &Offsets =
126       GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
127 
128   // We start counting line and column numbers from 1.
129   if (LineNo != 0)
130     --LineNo;
131 
132   const char *BufStart = Buffer->getBufferStart();
133 
134   // The offset cache contains the location of the \n for the specified line,
135   // we want the start of the line.  As such, we look for the previous entry.
136   if (LineNo == 0)
137     return BufStart;
138   if (LineNo > Offsets.size())
139     return nullptr;
140   return BufStart + Offsets[LineNo - 1] + 1;
141 }
142 
143 /// Return a pointer to the first character of the specified line number or
144 /// null if the line number is invalid.
145 const char *
146 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const {
147   size_t Sz = Buffer->getBufferSize();
148   if (Sz <= std::numeric_limits<uint8_t>::max())
149     return getPointerForLineNumberSpecialized<uint8_t>(LineNo);
150   else if (Sz <= std::numeric_limits<uint16_t>::max())
151     return getPointerForLineNumberSpecialized<uint16_t>(LineNo);
152   else if (Sz <= std::numeric_limits<uint32_t>::max())
153     return getPointerForLineNumberSpecialized<uint32_t>(LineNo);
154   else
155     return getPointerForLineNumberSpecialized<uint64_t>(LineNo);
156 }
157 
158 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
159     : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache),
160       IncludeLoc(Other.IncludeLoc) {
161   Other.OffsetCache = nullptr;
162 }
163 
164 SourceMgr::SrcBuffer::~SrcBuffer() {
165   if (OffsetCache) {
166     size_t Sz = Buffer->getBufferSize();
167     if (Sz <= std::numeric_limits<uint8_t>::max())
168       delete static_cast<std::vector<uint8_t> *>(OffsetCache);
169     else if (Sz <= std::numeric_limits<uint16_t>::max())
170       delete static_cast<std::vector<uint16_t> *>(OffsetCache);
171     else if (Sz <= std::numeric_limits<uint32_t>::max())
172       delete static_cast<std::vector<uint32_t> *>(OffsetCache);
173     else
174       delete static_cast<std::vector<uint64_t> *>(OffsetCache);
175     OffsetCache = nullptr;
176   }
177 }
178 
179 std::pair<unsigned, unsigned>
180 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
181   if (!BufferID)
182     BufferID = FindBufferContainingLoc(Loc);
183   assert(BufferID && "Invalid Location!");
184 
185   auto &SB = getBufferInfo(BufferID);
186   const char *Ptr = Loc.getPointer();
187 
188   unsigned LineNo = SB.getLineNumber(Ptr);
189   const char *BufStart = SB.Buffer->getBufferStart();
190   size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r");
191   if (NewlineOffs == StringRef::npos)
192     NewlineOffs = ~(size_t)0;
193   return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
194 }
195 
196 /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
197 /// This will return a null SMLoc if the line/column location is invalid.
198 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
199                                          unsigned ColNo) {
200   auto &SB = getBufferInfo(BufferID);
201   const char *Ptr = SB.getPointerForLineNumber(LineNo);
202   if (!Ptr)
203     return SMLoc();
204 
205   // We start counting line and column numbers from 1.
206   if (ColNo != 0)
207     --ColNo;
208 
209   // If we have a column number, validate it.
210   if (ColNo) {
211     // Make sure the location is within the current line.
212     if (Ptr + ColNo > SB.Buffer->getBufferEnd())
213       return SMLoc();
214 
215     // Make sure there is no newline in the way.
216     if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos)
217       return SMLoc();
218 
219     Ptr += ColNo;
220   }
221 
222   return SMLoc::getFromPointer(Ptr);
223 }
224 
225 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
226   if (IncludeLoc == SMLoc())
227     return; // Top of stack.
228 
229   unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
230   assert(CurBuf && "Invalid or unspecified location!");
231 
232   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
233 
234   OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
235      << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
236 }
237 
238 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
239                                    const Twine &Msg, ArrayRef<SMRange> Ranges,
240                                    ArrayRef<SMFixIt> FixIts) const {
241   // First thing to do: find the current buffer containing the specified
242   // location to pull out the source line.
243   SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
244   std::pair<unsigned, unsigned> LineAndCol;
245   StringRef BufferID = "<unknown>";
246   std::string LineStr;
247 
248   if (Loc.isValid()) {
249     unsigned CurBuf = FindBufferContainingLoc(Loc);
250     assert(CurBuf && "Invalid or unspecified location!");
251 
252     const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
253     BufferID = CurMB->getBufferIdentifier();
254 
255     // Scan backward to find the start of the line.
256     const char *LineStart = Loc.getPointer();
257     const char *BufStart = CurMB->getBufferStart();
258     while (LineStart != BufStart && LineStart[-1] != '\n' &&
259            LineStart[-1] != '\r')
260       --LineStart;
261 
262     // Get the end of the line.
263     const char *LineEnd = Loc.getPointer();
264     const char *BufEnd = CurMB->getBufferEnd();
265     while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
266       ++LineEnd;
267     LineStr = std::string(LineStart, LineEnd);
268 
269     // Convert any ranges to column ranges that only intersect the line of the
270     // location.
271     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
272       SMRange R = Ranges[i];
273       if (!R.isValid())
274         continue;
275 
276       // If the line doesn't contain any part of the range, then ignore it.
277       if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
278         continue;
279 
280       // Ignore pieces of the range that go onto other lines.
281       if (R.Start.getPointer() < LineStart)
282         R.Start = SMLoc::getFromPointer(LineStart);
283       if (R.End.getPointer() > LineEnd)
284         R.End = SMLoc::getFromPointer(LineEnd);
285 
286       // Translate from SMLoc ranges to column ranges.
287       // FIXME: Handle multibyte characters.
288       ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart,
289                                          R.End.getPointer() - LineStart));
290     }
291 
292     LineAndCol = getLineAndColumn(Loc, CurBuf);
293   }
294 
295   return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
296                       LineAndCol.second - 1, Kind, Msg.str(), LineStr,
297                       ColRanges, FixIts);
298 }
299 
300 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
301                              bool ShowColors) const {
302   // Report the message with the diagnostic handler if present.
303   if (DiagHandler) {
304     DiagHandler(Diagnostic, DiagContext);
305     return;
306   }
307 
308   if (Diagnostic.getLoc().isValid()) {
309     unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
310     assert(CurBuf && "Invalid or unspecified location!");
311     PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
312   }
313 
314   Diagnostic.print(nullptr, OS, ShowColors);
315 }
316 
317 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
318                              SourceMgr::DiagKind Kind, const Twine &Msg,
319                              ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts,
320                              bool ShowColors) const {
321   PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
322 }
323 
324 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
325                              const Twine &Msg, ArrayRef<SMRange> Ranges,
326                              ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
327   PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
328 }
329 
330 //===----------------------------------------------------------------------===//
331 // SMFixIt Implementation
332 //===----------------------------------------------------------------------===//
333 
334 SMFixIt::SMFixIt(SMRange R, const Twine &Replacement)
335     : Range(R), Text(Replacement.str()) {
336   assert(R.isValid());
337 }
338 
339 //===----------------------------------------------------------------------===//
340 // SMDiagnostic Implementation
341 //===----------------------------------------------------------------------===//
342 
343 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
344                            int Col, SourceMgr::DiagKind Kind, StringRef Msg,
345                            StringRef LineStr,
346                            ArrayRef<std::pair<unsigned, unsigned>> Ranges,
347                            ArrayRef<SMFixIt> Hints)
348     : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col),
349       Kind(Kind), Message(std::string(Msg)), LineContents(std::string(LineStr)),
350       Ranges(Ranges.vec()), FixIts(Hints.begin(), Hints.end()) {
351   llvm::sort(FixIts);
352 }
353 
354 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
355                            ArrayRef<SMFixIt> FixIts,
356                            ArrayRef<char> SourceLine) {
357   if (FixIts.empty())
358     return;
359 
360   const char *LineStart = SourceLine.begin();
361   const char *LineEnd = SourceLine.end();
362 
363   size_t PrevHintEndCol = 0;
364 
365   for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E;
366        ++I) {
367     // If the fixit contains a newline or tab, ignore it.
368     if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
369       continue;
370 
371     SMRange R = I->getRange();
372 
373     // If the line doesn't contain any part of the range, then ignore it.
374     if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
375       continue;
376 
377     // Translate from SMLoc to column.
378     // Ignore pieces of the range that go onto other lines.
379     // FIXME: Handle multibyte characters in the source line.
380     unsigned FirstCol;
381     if (R.Start.getPointer() < LineStart)
382       FirstCol = 0;
383     else
384       FirstCol = R.Start.getPointer() - LineStart;
385 
386     // If we inserted a long previous hint, push this one forwards, and add
387     // an extra space to show that this is not part of the previous
388     // completion. This is sort of the best we can do when two hints appear
389     // to overlap.
390     //
391     // Note that if this hint is located immediately after the previous
392     // hint, no space will be added, since the location is more important.
393     unsigned HintCol = FirstCol;
394     if (HintCol < PrevHintEndCol)
395       HintCol = PrevHintEndCol + 1;
396 
397     // FIXME: This assertion is intended to catch unintended use of multibyte
398     // characters in fixits. If we decide to do this, we'll have to track
399     // separate byte widths for the source and fixit lines.
400     assert((size_t)sys::locale::columnWidth(I->getText()) ==
401            I->getText().size());
402 
403     // This relies on one byte per column in our fixit hints.
404     unsigned LastColumnModified = HintCol + I->getText().size();
405     if (LastColumnModified > FixItLine.size())
406       FixItLine.resize(LastColumnModified, ' ');
407 
408     std::copy(I->getText().begin(), I->getText().end(),
409               FixItLine.begin() + HintCol);
410 
411     PrevHintEndCol = LastColumnModified;
412 
413     // For replacements, mark the removal range with '~'.
414     // FIXME: Handle multibyte characters in the source line.
415     unsigned LastCol;
416     if (R.End.getPointer() >= LineEnd)
417       LastCol = LineEnd - LineStart;
418     else
419       LastCol = R.End.getPointer() - LineStart;
420 
421     std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
422   }
423 }
424 
425 static void printSourceLine(raw_ostream &S, StringRef LineContents) {
426   // Print out the source line one character at a time, so we can expand tabs.
427   for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
428     size_t NextTab = LineContents.find('\t', i);
429     // If there were no tabs left, print the rest, we are done.
430     if (NextTab == StringRef::npos) {
431       S << LineContents.drop_front(i);
432       break;
433     }
434 
435     // Otherwise, print from i to NextTab.
436     S << LineContents.slice(i, NextTab);
437     OutCol += NextTab - i;
438     i = NextTab;
439 
440     // If we have a tab, emit at least one space, then round up to 8 columns.
441     do {
442       S << ' ';
443       ++OutCol;
444     } while ((OutCol % TabStop) != 0);
445   }
446   S << '\n';
447 }
448 
449 static bool isNonASCII(char c) { return c & 0x80; }
450 
451 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
452                          bool ShowKindLabel) const {
453   ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
454 
455   {
456     WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, Mode);
457 
458     if (ProgName && ProgName[0])
459       S << ProgName << ": ";
460 
461     if (!Filename.empty()) {
462       if (Filename == "-")
463         S << "<stdin>";
464       else
465         S << Filename;
466 
467       if (LineNo != -1) {
468         S << ':' << LineNo;
469         if (ColumnNo != -1)
470           S << ':' << (ColumnNo + 1);
471       }
472       S << ": ";
473     }
474   }
475 
476   if (ShowKindLabel) {
477     switch (Kind) {
478     case SourceMgr::DK_Error:
479       WithColor::error(OS, "", !ShowColors);
480       break;
481     case SourceMgr::DK_Warning:
482       WithColor::warning(OS, "", !ShowColors);
483       break;
484     case SourceMgr::DK_Note:
485       WithColor::note(OS, "", !ShowColors);
486       break;
487     case SourceMgr::DK_Remark:
488       WithColor::remark(OS, "", !ShowColors);
489       break;
490     }
491   }
492 
493   WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, Mode) << Message << '\n';
494 
495   if (LineNo == -1 || ColumnNo == -1)
496     return;
497 
498   // FIXME: If there are multibyte or multi-column characters in the source, all
499   // our ranges will be wrong. To do this properly, we'll need a byte-to-column
500   // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
501   // expanding them later, and bail out rather than show incorrect ranges and
502   // misaligned fixits for any other odd characters.
503   if (find_if(LineContents, isNonASCII) != LineContents.end()) {
504     printSourceLine(OS, LineContents);
505     return;
506   }
507   size_t NumColumns = LineContents.size();
508 
509   // Build the line with the caret and ranges.
510   std::string CaretLine(NumColumns + 1, ' ');
511 
512   // Expand any ranges.
513   for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
514     std::pair<unsigned, unsigned> R = Ranges[r];
515     std::fill(&CaretLine[R.first],
516               &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
517   }
518 
519   // Add any fix-its.
520   // FIXME: Find the beginning of the line properly for multibyte characters.
521   std::string FixItInsertionLine;
522   buildFixItLine(
523       CaretLine, FixItInsertionLine, FixIts,
524       makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size()));
525 
526   // Finally, plop on the caret.
527   if (unsigned(ColumnNo) <= NumColumns)
528     CaretLine[ColumnNo] = '^';
529   else
530     CaretLine[NumColumns] = '^';
531 
532   // ... and remove trailing whitespace so the output doesn't wrap for it.  We
533   // know that the line isn't completely empty because it has the caret in it at
534   // least.
535   CaretLine.erase(CaretLine.find_last_not_of(' ') + 1);
536 
537   printSourceLine(OS, LineContents);
538 
539   {
540     ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
541     WithColor S(OS, raw_ostream::GREEN, true, false, Mode);
542 
543     // Print out the caret line, matching tabs in the source line.
544     for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
545       if (i >= LineContents.size() || LineContents[i] != '\t') {
546         S << CaretLine[i];
547         ++OutCol;
548         continue;
549       }
550 
551       // Okay, we have a tab.  Insert the appropriate number of characters.
552       do {
553         S << CaretLine[i];
554         ++OutCol;
555       } while ((OutCol % TabStop) != 0);
556     }
557     S << '\n';
558   }
559 
560   // Print out the replacement line, matching tabs in the source line.
561   if (FixItInsertionLine.empty())
562     return;
563 
564   for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
565     if (i >= LineContents.size() || LineContents[i] != '\t') {
566       OS << FixItInsertionLine[i];
567       ++OutCol;
568       continue;
569     }
570 
571     // Okay, we have a tab.  Insert the appropriate number of characters.
572     do {
573       OS << FixItInsertionLine[i];
574       // FIXME: This is trying not to break up replacements, but then to re-sync
575       // with the tabs between replacements. This will fail, though, if two
576       // fix-it replacements are exactly adjacent, or if a fix-it contains a
577       // space. Really we should be precomputing column widths, which we'll
578       // need anyway for multibyte chars.
579       if (FixItInsertionLine[i] != ' ')
580         ++i;
581       ++OutCol;
582     } while (((OutCol % TabStop) != 0) && i != e);
583   }
584   OS << '\n';
585 }
586