1 //===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #include <algorithm>
24 #include <system_error>
25 #include <vector>
26 
27 #include <assert.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 using namespace lldb;
33 using namespace lldb_private;
34 
35 namespace {
36 
37 static constexpr FileSpec::Style GetNativeStyle() {
38 #if defined(_WIN32)
39   return FileSpec::Style::windows;
40 #else
41   return FileSpec::Style::posix;
42 #endif
43 }
44 
45 bool PathStyleIsPosix(FileSpec::Style style) {
46   return (style == FileSpec::Style::posix ||
47           (style == FileSpec::Style::native &&
48            GetNativeStyle() == FileSpec::Style::posix));
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72   SetFile(path, style);
73 }
74 
75 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &Triple)
76     : FileSpec{path, Triple.isOSWindows() ? Style::windows : Style::posix} {}
77 
78 // Copy constructor
79 FileSpec::FileSpec(const FileSpec *rhs) : m_directory(), m_filename() {
80   if (rhs)
81     *this = *rhs;
82 }
83 
84 // Virtual destructor in case anyone inherits from this class.
85 FileSpec::~FileSpec() {}
86 
87 namespace {
88 /// Safely get a character at the specified index.
89 ///
90 /// \param[in] path
91 ///     A full, partial, or relative path to a file.
92 ///
93 /// \param[in] i
94 ///     An index into path which may or may not be valid.
95 ///
96 /// \return
97 ///   The character at index \a i if the index is valid, or 0 if
98 ///   the index is not valid.
99 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
100   if (i < path.size())
101     return path[i];
102   return 0;
103 }
104 
105 /// Check if a path needs to be normalized.
106 ///
107 /// Check if a path needs to be normalized. We currently consider a
108 /// path to need normalization if any of the following are true
109 ///  - path contains "/./"
110 ///  - path contains "/../"
111 ///  - path contains "//"
112 ///  - path ends with "/"
113 /// Paths that start with "./" or with "../" are not considered to
114 /// need normalization since we aren't trying to resolve the path,
115 /// we are just trying to remove redundant things from the path.
116 ///
117 /// \param[in] path
118 ///     A full, partial, or relative path to a file.
119 ///
120 /// \return
121 ///   Returns \b true if the path needs to be normalized.
122 bool needsNormalization(const llvm::StringRef &path) {
123   if (path.empty())
124     return false;
125   // We strip off leading "." values so these paths need to be normalized
126   if (path[0] == '.')
127     return true;
128   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
129        i = path.find_first_of("\\/", i + 1)) {
130     const auto next = safeCharAtIndex(path, i+1);
131     switch (next) {
132       case 0:
133         // path separator char at the end of the string which should be
134         // stripped unless it is the one and only character
135         return i > 0;
136       case '/':
137       case '\\':
138         // two path separator chars in the middle of a path needs to be
139         // normalized
140         if (i > 0)
141           return true;
142         ++i;
143         break;
144 
145       case '.': {
146           const auto next_next = safeCharAtIndex(path, i+2);
147           switch (next_next) {
148             default: break;
149             case 0: return true; // ends with "/."
150             case '/':
151             case '\\':
152               return true; // contains "/./"
153             case '.': {
154               const auto next_next_next = safeCharAtIndex(path, i+3);
155               switch (next_next_next) {
156                 default: break;
157                 case 0: return true; // ends with "/.."
158                 case '/':
159                 case '\\':
160                   return true; // contains "/../"
161               }
162               break;
163             }
164           }
165         }
166         break;
167 
168       default:
169         break;
170     }
171   }
172   return false;
173 }
174 
175 
176 }
177 // Assignment operator.
178 const FileSpec &FileSpec::operator=(const FileSpec &rhs) {
179   if (this != &rhs) {
180     m_directory = rhs.m_directory;
181     m_filename = rhs.m_filename;
182     m_is_resolved = rhs.m_is_resolved;
183     m_style = rhs.m_style;
184   }
185   return *this;
186 }
187 
188 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
189 
190 // Update the contents of this object with a new path. The path will be split
191 // up into a directory and filename and stored as uniqued string values for
192 // quick comparison and efficient memory usage.
193 void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
194   m_filename.Clear();
195   m_directory.Clear();
196   m_is_resolved = false;
197   m_style = (style == Style::native) ? GetNativeStyle() : style;
198 
199   if (pathname.empty())
200     return;
201 
202   llvm::SmallString<128> resolved(pathname);
203 
204   // Normalize the path by removing ".", ".." and other redundant components.
205   if (needsNormalization(resolved))
206     llvm::sys::path::remove_dots(resolved, true, m_style);
207 
208   // Normalize back slashes to forward slashes
209   if (m_style == Style::windows)
210     std::replace(resolved.begin(), resolved.end(), '\\', '/');
211 
212   if (resolved.empty()) {
213     // If we have no path after normalization set the path to the current
214     // directory. This matches what python does and also a few other path
215     // utilities.
216     m_filename.SetString(".");
217     return;
218   }
219 
220   // Split path into filename and directory. We rely on the underlying char
221   // pointer to be nullptr when the components are empty.
222   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
223   if(!filename.empty())
224     m_filename.SetString(filename);
225 
226   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
227   if(!directory.empty())
228     m_directory.SetString(directory);
229 }
230 
231 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &Triple) {
232   return SetFile(path, Triple.isOSWindows() ? Style::windows : Style::posix);
233 }
234 
235 // Convert to pointer operator. This allows code to check any FileSpec objects
236 // to see if they contain anything valid using code such as:
237 //
238 //  if (file_spec)
239 //  {}
240 FileSpec::operator bool() const { return m_filename || m_directory; }
241 
242 // Logical NOT operator. This allows code to check any FileSpec objects to see
243 // if they are invalid using code such as:
244 //
245 //  if (!file_spec)
246 //  {}
247 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
248 
249 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
250   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
251   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
252 }
253 
254 bool FileSpec::FileEquals(const FileSpec &rhs) const {
255   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
256   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
257 }
258 
259 // Equal to operator
260 bool FileSpec::operator==(const FileSpec &rhs) const {
261   return FileEquals(rhs) && DirectoryEquals(rhs);
262 }
263 
264 // Not equal to operator
265 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
266 
267 // Less than operator
268 bool FileSpec::operator<(const FileSpec &rhs) const {
269   return FileSpec::Compare(*this, rhs, true) < 0;
270 }
271 
272 // Dump a FileSpec object to a stream
273 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
274   f.Dump(&s);
275   return s;
276 }
277 
278 // Clear this object by releasing both the directory and filename string values
279 // and making them both the empty string.
280 void FileSpec::Clear() {
281   m_directory.Clear();
282   m_filename.Clear();
283 }
284 
285 // Compare two FileSpec objects. If "full" is true, then both the directory and
286 // the filename must match. If "full" is false, then the directory names for
287 // "a" and "b" are only compared if they are both non-empty. This allows a
288 // FileSpec object to only contain a filename and it can match FileSpec objects
289 // that have matching filenames with different paths.
290 //
291 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
292 // "a" is greater than "b".
293 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
294   int result = 0;
295 
296   // case sensitivity of compare
297   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
298 
299   // If full is true, then we must compare both the directory and filename.
300 
301   // If full is false, then if either directory is empty, then we match on the
302   // basename only, and if both directories have valid values, we still do a
303   // full compare. This allows for matching when we just have a filename in one
304   // of the FileSpec objects.
305 
306   if (full || (a.m_directory && b.m_directory)) {
307     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
308     if (result)
309       return result;
310   }
311   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
312 }
313 
314 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
315   // case sensitivity of equality test
316   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
317 
318   const bool filenames_equal = ConstString::Equals(a.m_filename,
319                                                    b.m_filename,
320                                                    case_sensitive);
321 
322   if (!filenames_equal)
323     return false;
324 
325   if (!full && (a.GetDirectory().IsEmpty() || b.GetDirectory().IsEmpty()))
326     return filenames_equal;
327 
328   return a == b;
329 }
330 
331 llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
332   if (absolute_path.startswith("/"))
333     return Style::posix;
334   if (absolute_path.startswith(R"(\\)"))
335     return Style::windows;
336   if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
337       absolute_path.substr(1, 2) == R"(:\)")
338     return Style::windows;
339   return llvm::None;
340 }
341 
342 // Dump the object to the supplied stream. If the object contains a valid
343 // directory name, it will be displayed followed by a directory delimiter, and
344 // the filename.
345 void FileSpec::Dump(Stream *s) const {
346   if (s) {
347     std::string path{GetPath(true)};
348     s->PutCString(path);
349     char path_separator = GetPreferredPathSeparator(m_style);
350     if (!m_filename && !path.empty() && path.back() != path_separator)
351       s->PutChar(path_separator);
352   }
353 }
354 
355 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
356 
357 // Directory string get accessor.
358 ConstString &FileSpec::GetDirectory() { return m_directory; }
359 
360 // Directory string const get accessor.
361 ConstString FileSpec::GetDirectory() const { return m_directory; }
362 
363 // Filename string get accessor.
364 ConstString &FileSpec::GetFilename() { return m_filename; }
365 
366 // Filename string const get accessor.
367 ConstString FileSpec::GetFilename() const { return m_filename; }
368 
369 // Extract the directory and path into a fixed buffer. This is needed as the
370 // directory and path are stored in separate string values.
371 size_t FileSpec::GetPath(char *path, size_t path_max_len,
372                          bool denormalize) const {
373   if (!path)
374     return 0;
375 
376   std::string result = GetPath(denormalize);
377   ::snprintf(path, path_max_len, "%s", result.c_str());
378   return std::min(path_max_len - 1, result.length());
379 }
380 
381 std::string FileSpec::GetPath(bool denormalize) const {
382   llvm::SmallString<64> result;
383   GetPath(result, denormalize);
384   return std::string(result.begin(), result.end());
385 }
386 
387 const char *FileSpec::GetCString(bool denormalize) const {
388   return ConstString{GetPath(denormalize)}.AsCString(nullptr);
389 }
390 
391 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
392                        bool denormalize) const {
393   path.append(m_directory.GetStringRef().begin(),
394               m_directory.GetStringRef().end());
395   // Since the path was normalized and all paths use '/' when stored in these
396   // objects, we don't need to look for the actual syntax specific path
397   // separator, we just look for and insert '/'.
398   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
399       m_filename.GetStringRef().back() != '/')
400     path.insert(path.end(), '/');
401   path.append(m_filename.GetStringRef().begin(),
402               m_filename.GetStringRef().end());
403   if (denormalize && !path.empty())
404     Denormalize(path, m_style);
405 }
406 
407 ConstString FileSpec::GetFileNameExtension() const {
408   return ConstString(
409       llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
410 }
411 
412 ConstString FileSpec::GetFileNameStrippingExtension() const {
413   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
414 }
415 
416 // Return the size in bytes that this object takes in memory. This returns the
417 // size in bytes of this object, not any shared string values it may refer to.
418 size_t FileSpec::MemorySize() const {
419   return m_filename.MemorySize() + m_directory.MemorySize();
420 }
421 
422 FileSpec
423 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
424   FileSpec ret = *this;
425   ret.AppendPathComponent(component);
426   return ret;
427 }
428 
429 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
430   llvm::SmallString<64> current_path;
431   GetPath(current_path, false);
432   if (llvm::sys::path::has_parent_path(current_path, m_style))
433     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
434                     m_style);
435   return *this;
436 }
437 
438 ConstString FileSpec::GetLastPathComponent() const {
439   llvm::SmallString<64> current_path;
440   GetPath(current_path, false);
441   return ConstString(llvm::sys::path::filename(current_path, m_style));
442 }
443 
444 void FileSpec::PrependPathComponent(llvm::StringRef component) {
445   llvm::SmallString<64> new_path(component);
446   llvm::SmallString<64> current_path;
447   GetPath(current_path, false);
448   llvm::sys::path::append(new_path,
449                           llvm::sys::path::begin(current_path, m_style),
450                           llvm::sys::path::end(current_path), m_style);
451   SetFile(new_path, m_style);
452 }
453 
454 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
455   return PrependPathComponent(new_path.GetPath(false));
456 }
457 
458 void FileSpec::AppendPathComponent(llvm::StringRef component) {
459   llvm::SmallString<64> current_path;
460   GetPath(current_path, false);
461   llvm::sys::path::append(current_path, m_style, component);
462   SetFile(current_path, m_style);
463 }
464 
465 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
466   return AppendPathComponent(new_path.GetPath(false));
467 }
468 
469 bool FileSpec::RemoveLastPathComponent() {
470   llvm::SmallString<64> current_path;
471   GetPath(current_path, false);
472   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
473     SetFile(llvm::sys::path::parent_path(current_path, m_style));
474     return true;
475   }
476   return false;
477 }
478 /// Returns true if the filespec represents an implementation source
479 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
480 /// extension).
481 ///
482 /// \return
483 ///     \b true if the filespec represents an implementation source
484 ///     file, \b false otherwise.
485 bool FileSpec::IsSourceImplementationFile() const {
486   ConstString extension(GetFileNameExtension());
487   if (!extension)
488     return false;
489 
490   static RegularExpression g_source_file_regex(llvm::StringRef(
491       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
492       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
493       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
494       "$"));
495   return g_source_file_regex.Execute(extension.GetStringRef());
496 }
497 
498 bool FileSpec::IsRelative() const {
499   return !IsAbsolute();
500 }
501 
502 bool FileSpec::IsAbsolute() const {
503   llvm::SmallString<64> current_path;
504   GetPath(current_path, false);
505 
506   // Early return if the path is empty.
507   if (current_path.empty())
508     return false;
509 
510   // We consider paths starting with ~ to be absolute.
511   if (current_path[0] == '~')
512     return true;
513 
514   return llvm::sys::path::is_absolute(current_path, m_style);
515 }
516 
517 void FileSpec::MakeAbsolute(const FileSpec &dir) {
518   if (IsRelative())
519     PrependPathComponent(dir);
520 }
521 
522 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
523                                              raw_ostream &Stream,
524                                              StringRef Style) {
525   assert(
526       (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
527       "Invalid FileSpec style!");
528 
529   StringRef dir = F.GetDirectory().GetStringRef();
530   StringRef file = F.GetFilename().GetStringRef();
531 
532   if (dir.empty() && file.empty()) {
533     Stream << "(empty)";
534     return;
535   }
536 
537   if (Style.equals_lower("F")) {
538     Stream << (file.empty() ? "(empty)" : file);
539     return;
540   }
541 
542   // Style is either D or empty, either way we need to print the directory.
543   if (!dir.empty()) {
544     // Directory is stored in normalized form, which might be different than
545     // preferred form.  In order to handle this, we need to cut off the
546     // filename, then denormalize, then write the entire denorm'ed directory.
547     llvm::SmallString<64> denormalized_dir = dir;
548     Denormalize(denormalized_dir, F.GetPathStyle());
549     Stream << denormalized_dir;
550     Stream << GetPreferredPathSeparator(F.GetPathStyle());
551   }
552 
553   if (Style.equals_lower("D")) {
554     // We only want to print the directory, so now just exit.
555     if (dir.empty())
556       Stream << "(empty)";
557     return;
558   }
559 
560   if (!file.empty())
561     Stream << file;
562 }
563