1 //===-- FileSpec.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <optional>
25 #include <system_error>
26 #include <vector>
27 
28 #include <cassert>
29 #include <climits>
30 #include <cstdio>
31 #include <cstring>
32 
33 using namespace lldb;
34 using namespace lldb_private;
35 
36 namespace {
37 
38 static constexpr FileSpec::Style GetNativeStyle() {
39 #if defined(_WIN32)
40   return FileSpec::Style::windows;
41 #else
42   return FileSpec::Style::posix;
43 #endif
44 }
45 
46 bool PathStyleIsPosix(FileSpec::Style style) {
47   return llvm::sys::path::is_style_posix(style);
48 }
49 
50 const char *GetPathSeparators(FileSpec::Style style) {
51   return llvm::sys::path::get_separator(style).data();
52 }
53 
54 char GetPreferredPathSeparator(FileSpec::Style style) {
55   return GetPathSeparators(style)[0];
56 }
57 
58 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
59   if (PathStyleIsPosix(style))
60     return;
61 
62   std::replace(path.begin(), path.end(), '/', '\\');
63 }
64 
65 } // end anonymous namespace
66 
67 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
68 
69 // Default constructor that can take an optional full path to a file on disk.
70 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
71   SetFile(path, style);
72 }
73 
74 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
75     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
76 
77 namespace {
78 /// Safely get a character at the specified index.
79 ///
80 /// \param[in] path
81 ///     A full, partial, or relative path to a file.
82 ///
83 /// \param[in] i
84 ///     An index into path which may or may not be valid.
85 ///
86 /// \return
87 ///   The character at index \a i if the index is valid, or 0 if
88 ///   the index is not valid.
89 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
90   if (i < path.size())
91     return path[i];
92   return 0;
93 }
94 
95 /// Check if a path needs to be normalized.
96 ///
97 /// Check if a path needs to be normalized. We currently consider a
98 /// path to need normalization if any of the following are true
99 ///  - path contains "/./"
100 ///  - path contains "/../"
101 ///  - path contains "//"
102 ///  - path ends with "/"
103 /// Paths that start with "./" or with "../" are not considered to
104 /// need normalization since we aren't trying to resolve the path,
105 /// we are just trying to remove redundant things from the path.
106 ///
107 /// \param[in] path
108 ///     A full, partial, or relative path to a file.
109 ///
110 /// \return
111 ///   Returns \b true if the path needs to be normalized.
112 bool needsNormalization(const llvm::StringRef &path) {
113   if (path.empty())
114     return false;
115   // We strip off leading "." values so these paths need to be normalized
116   if (path[0] == '.')
117     return true;
118   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
119        i = path.find_first_of("\\/", i + 1)) {
120     const auto next = safeCharAtIndex(path, i+1);
121     switch (next) {
122       case 0:
123         // path separator char at the end of the string which should be
124         // stripped unless it is the one and only character
125         return i > 0;
126       case '/':
127       case '\\':
128         // two path separator chars in the middle of a path needs to be
129         // normalized
130         if (i > 0)
131           return true;
132         ++i;
133         break;
134 
135       case '.': {
136           const auto next_next = safeCharAtIndex(path, i+2);
137           switch (next_next) {
138             default: break;
139             case 0: return true; // ends with "/."
140             case '/':
141             case '\\':
142               return true; // contains "/./"
143             case '.': {
144               const auto next_next_next = safeCharAtIndex(path, i+3);
145               switch (next_next_next) {
146                 default: break;
147                 case 0: return true; // ends with "/.."
148                 case '/':
149                 case '\\':
150                   return true; // contains "/../"
151               }
152               break;
153             }
154           }
155         }
156         break;
157 
158       default:
159         break;
160     }
161   }
162   return false;
163 }
164 
165 
166 }
167 
168 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
169 
170 // Update the contents of this object with a new path. The path will be split
171 // up into a directory and filename and stored as uniqued string values for
172 // quick comparison and efficient memory usage.
173 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
174   Clear();
175   m_style = (style == Style::native) ? GetNativeStyle() : style;
176 
177   if (pathname.empty())
178     return;
179 
180   llvm::SmallString<128> resolved(pathname);
181 
182   // Normalize the path by removing ".", ".." and other redundant components.
183   if (needsNormalization(resolved))
184     llvm::sys::path::remove_dots(resolved, true, m_style);
185 
186   // Normalize back slashes to forward slashes
187   if (m_style == Style::windows)
188     std::replace(resolved.begin(), resolved.end(), '\\', '/');
189 
190   if (resolved.empty()) {
191     // If we have no path after normalization set the path to the current
192     // directory. This matches what python does and also a few other path
193     // utilities.
194     m_filename.SetString(".");
195     return;
196   }
197 
198   // Split path into filename and directory. We rely on the underlying char
199   // pointer to be nullptr when the components are empty.
200   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
201   if(!filename.empty())
202     m_filename.SetString(filename);
203 
204   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
205   if(!directory.empty())
206     m_directory.SetString(directory);
207 }
208 
209 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
210   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
211 }
212 
213 // Convert to pointer operator. This allows code to check any FileSpec objects
214 // to see if they contain anything valid using code such as:
215 //
216 //  if (file_spec)
217 //  {}
218 FileSpec::operator bool() const { return m_filename || m_directory; }
219 
220 // Logical NOT operator. This allows code to check any FileSpec objects to see
221 // if they are invalid using code such as:
222 //
223 //  if (!file_spec)
224 //  {}
225 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
226 
227 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
228   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
229   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
230 }
231 
232 bool FileSpec::FileEquals(const FileSpec &rhs) const {
233   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
234   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
235 }
236 
237 // Equal to operator
238 bool FileSpec::operator==(const FileSpec &rhs) const {
239   return FileEquals(rhs) && DirectoryEquals(rhs);
240 }
241 
242 // Not equal to operator
243 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
244 
245 // Less than operator
246 bool FileSpec::operator<(const FileSpec &rhs) const {
247   return FileSpec::Compare(*this, rhs, true) < 0;
248 }
249 
250 // Dump a FileSpec object to a stream
251 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
252   f.Dump(s.AsRawOstream());
253   return s;
254 }
255 
256 // Clear this object by releasing both the directory and filename string values
257 // and making them both the empty string.
258 void FileSpec::Clear() {
259   m_directory.Clear();
260   m_filename.Clear();
261   PathWasModified();
262 }
263 
264 // Compare two FileSpec objects. If "full" is true, then both the directory and
265 // the filename must match. If "full" is false, then the directory names for
266 // "a" and "b" are only compared if they are both non-empty. This allows a
267 // FileSpec object to only contain a filename and it can match FileSpec objects
268 // that have matching filenames with different paths.
269 //
270 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
271 // "a" is greater than "b".
272 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
273   int result = 0;
274 
275   // case sensitivity of compare
276   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
277 
278   // If full is true, then we must compare both the directory and filename.
279 
280   // If full is false, then if either directory is empty, then we match on the
281   // basename only, and if both directories have valid values, we still do a
282   // full compare. This allows for matching when we just have a filename in one
283   // of the FileSpec objects.
284 
285   if (full || (a.m_directory && b.m_directory)) {
286     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
287     if (result)
288       return result;
289   }
290   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
291 }
292 
293 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
294   if (full || (a.GetDirectory() && b.GetDirectory()))
295     return a == b;
296 
297   return a.FileEquals(b);
298 }
299 
300 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
301   if (pattern.GetDirectory())
302     return pattern == file;
303   if (pattern.GetFilename())
304     return pattern.FileEquals(file);
305   return true;
306 }
307 
308 std::optional<FileSpec::Style>
309 FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
310   if (absolute_path.startswith("/"))
311     return Style::posix;
312   if (absolute_path.startswith(R"(\\)"))
313     return Style::windows;
314   if (absolute_path.size() >= 3 && llvm::isAlpha(absolute_path[0]) &&
315       (absolute_path.substr(1, 2) == R"(:\)" ||
316        absolute_path.substr(1, 2) == R"(:/)"))
317     return Style::windows;
318   return std::nullopt;
319 }
320 
321 // Dump the object to the supplied stream. If the object contains a valid
322 // directory name, it will be displayed followed by a directory delimiter, and
323 // the filename.
324 void FileSpec::Dump(llvm::raw_ostream &s) const {
325   std::string path{GetPath(true)};
326   s << path;
327   char path_separator = GetPreferredPathSeparator(m_style);
328   if (!m_filename && !path.empty() && path.back() != path_separator)
329     s << path_separator;
330 }
331 
332 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
333 
334 void FileSpec::SetDirectory(ConstString directory) {
335   m_directory = directory;
336   PathWasModified();
337 }
338 
339 void FileSpec::SetDirectory(llvm::StringRef directory) {
340   m_directory = ConstString(directory);
341   PathWasModified();
342 }
343 
344 void FileSpec::SetFilename(ConstString filename) {
345   m_filename = filename;
346   PathWasModified();
347 }
348 
349 void FileSpec::SetFilename(llvm::StringRef filename) {
350   m_filename = ConstString(filename);
351   PathWasModified();
352 }
353 
354 void FileSpec::ClearFilename() {
355   m_filename.Clear();
356   PathWasModified();
357 }
358 
359 void FileSpec::ClearDirectory() {
360   m_directory.Clear();
361   PathWasModified();
362 }
363 
364 // Extract the directory and path into a fixed buffer. This is needed as the
365 // directory and path are stored in separate string values.
366 size_t FileSpec::GetPath(char *path, size_t path_max_len,
367                          bool denormalize) const {
368   if (!path)
369     return 0;
370 
371   std::string result = GetPath(denormalize);
372   ::snprintf(path, path_max_len, "%s", result.c_str());
373   return std::min(path_max_len - 1, result.length());
374 }
375 
376 std::string FileSpec::GetPath(bool denormalize) const {
377   llvm::SmallString<64> result;
378   GetPath(result, denormalize);
379   return static_cast<std::string>(result);
380 }
381 
382 ConstString FileSpec::GetPathAsConstString(bool denormalize) const {
383   return ConstString{GetPath(denormalize)};
384 }
385 
386 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
387                        bool denormalize) const {
388   path.append(m_directory.GetStringRef().begin(),
389               m_directory.GetStringRef().end());
390   // Since the path was normalized and all paths use '/' when stored in these
391   // objects, we don't need to look for the actual syntax specific path
392   // separator, we just look for and insert '/'.
393   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
394       m_filename.GetStringRef().back() != '/')
395     path.insert(path.end(), '/');
396   path.append(m_filename.GetStringRef().begin(),
397               m_filename.GetStringRef().end());
398   if (denormalize && !path.empty())
399     Denormalize(path, m_style);
400 }
401 
402 ConstString FileSpec::GetFileNameExtension() const {
403   return ConstString(
404       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
405 }
406 
407 ConstString FileSpec::GetFileNameStrippingExtension() const {
408   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
409 }
410 
411 // Return the size in bytes that this object takes in memory. This returns the
412 // size in bytes of this object, not any shared string values it may refer to.
413 size_t FileSpec::MemorySize() const {
414   return m_filename.MemorySize() + m_directory.MemorySize();
415 }
416 
417 FileSpec
418 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
419   FileSpec ret = *this;
420   ret.AppendPathComponent(component);
421   return ret;
422 }
423 
424 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
425   llvm::SmallString<64> current_path;
426   GetPath(current_path, false);
427   if (llvm::sys::path::has_parent_path(current_path, m_style))
428     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
429                     m_style);
430   return *this;
431 }
432 
433 ConstString FileSpec::GetLastPathComponent() const {
434   llvm::SmallString<64> current_path;
435   GetPath(current_path, false);
436   return ConstString(llvm::sys::path::filename(current_path, m_style));
437 }
438 
439 void FileSpec::PrependPathComponent(llvm::StringRef component) {
440   llvm::SmallString<64> new_path(component);
441   llvm::SmallString<64> current_path;
442   GetPath(current_path, false);
443   llvm::sys::path::append(new_path,
444                           llvm::sys::path::begin(current_path, m_style),
445                           llvm::sys::path::end(current_path), m_style);
446   SetFile(new_path, m_style);
447 }
448 
449 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
450   return PrependPathComponent(new_path.GetPath(false));
451 }
452 
453 void FileSpec::AppendPathComponent(llvm::StringRef component) {
454   llvm::SmallString<64> current_path;
455   GetPath(current_path, false);
456   llvm::sys::path::append(current_path, m_style, component);
457   SetFile(current_path, m_style);
458 }
459 
460 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
461   return AppendPathComponent(new_path.GetPath(false));
462 }
463 
464 bool FileSpec::RemoveLastPathComponent() {
465   llvm::SmallString<64> current_path;
466   GetPath(current_path, false);
467   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
468     SetFile(llvm::sys::path::parent_path(current_path, m_style));
469     return true;
470   }
471   return false;
472 }
473 /// Returns true if the filespec represents an implementation source
474 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
475 /// extension).
476 ///
477 /// \return
478 ///     \b true if the filespec represents an implementation source
479 ///     file, \b false otherwise.
480 bool FileSpec::IsSourceImplementationFile() const {
481   ConstString extension(GetFileNameExtension());
482   if (!extension)
483     return false;
484 
485   static RegularExpression g_source_file_regex(llvm::StringRef(
486       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
487       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
488       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
489       "$"));
490   return g_source_file_regex.Execute(extension.GetStringRef());
491 }
492 
493 bool FileSpec::IsRelative() const {
494   return !IsAbsolute();
495 }
496 
497 bool FileSpec::IsAbsolute() const {
498   // Check if we have cached if this path is absolute to avoid recalculating.
499   if (m_absolute != Absolute::Calculate)
500     return m_absolute == Absolute::Yes;
501 
502   m_absolute = Absolute::No;
503 
504   llvm::SmallString<64> path;
505   GetPath(path, false);
506 
507   if (!path.empty()) {
508     // We consider paths starting with ~ to be absolute.
509     if (path[0] == '~' || llvm::sys::path::is_absolute(path, m_style))
510       m_absolute = Absolute::Yes;
511   }
512 
513   return m_absolute == Absolute::Yes;
514 }
515 
516 void FileSpec::MakeAbsolute(const FileSpec &dir) {
517   if (IsRelative())
518     PrependPathComponent(dir);
519 }
520 
521 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
522                                              raw_ostream &Stream,
523                                              StringRef Style) {
524   assert((Style.empty() || Style.equals_insensitive("F") ||
525           Style.equals_insensitive("D")) &&
526          "Invalid FileSpec style!");
527 
528   StringRef dir = F.GetDirectory().GetStringRef();
529   StringRef file = F.GetFilename().GetStringRef();
530 
531   if (dir.empty() && file.empty()) {
532     Stream << "(empty)";
533     return;
534   }
535 
536   if (Style.equals_insensitive("F")) {
537     Stream << (file.empty() ? "(empty)" : file);
538     return;
539   }
540 
541   // Style is either D or empty, either way we need to print the directory.
542   if (!dir.empty()) {
543     // Directory is stored in normalized form, which might be different than
544     // preferred form.  In order to handle this, we need to cut off the
545     // filename, then denormalize, then write the entire denorm'ed directory.
546     llvm::SmallString<64> denormalized_dir = dir;
547     Denormalize(denormalized_dir, F.GetPathStyle());
548     Stream << denormalized_dir;
549     Stream << GetPreferredPathSeparator(F.GetPathStyle());
550   }
551 
552   if (Style.equals_insensitive("D")) {
553     // We only want to print the directory, so now just exit.
554     if (dir.empty())
555       Stream << "(empty)";
556     return;
557   }
558 
559   if (!file.empty())
560     Stream << file;
561 }
562