1 //===- FileMatchTrie.cpp --------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file contains the implementation of a FileMatchTrie.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/Tooling/FileMatchTrie.h"
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Support/FileSystem.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/raw_ostream.h"
19 #include <string>
20 #include <vector>
21 
22 using namespace clang;
23 using namespace tooling;
24 
25 namespace {
26 
27 /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
28 struct DefaultPathComparator : public PathComparator {
29   bool equivalent(StringRef FileA, StringRef FileB) const override {
30     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
31   }
32 };
33 
34 } // namespace
35 
36 namespace clang {
37 namespace tooling {
38 
39 /// A node of the \c FileMatchTrie.
40 ///
41 /// Each node has storage for up to one path and a map mapping a path segment to
42 /// child nodes. The trie starts with an empty root node.
43 class FileMatchTrieNode {
44 public:
45   /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
46   /// the number of \c NewPath's trailing characters already consumed during
47   /// recursion.
48   ///
49   /// An insert of a path
50   /// 'p'starts at the root node and does the following:
51   /// - If the node is empty, insert 'p' into its storage and abort.
52   /// - If the node has a path 'p2' but no children, take the last path segment
53   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
54   ///   'p2' there.
55   /// - Insert a new child for the last segment of 'p' and insert the rest of
56   ///   'p' there.
57   ///
58   /// An insert operation is linear in the number of a path's segments.
59   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
60     // We cannot put relative paths into the FileMatchTrie as then a path can be
61     // a postfix of another path, violating a core assumption of the trie.
62     if (llvm::sys::path::is_relative(NewPath))
63       return;
64     if (Path.empty()) {
65       // This is an empty leaf. Store NewPath and return.
66       Path = std::string(NewPath);
67       return;
68     }
69     if (Children.empty()) {
70       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
71       if (NewPath == Path)
72           return;
73       // Make this a node and create a child-leaf with 'Path'.
74       StringRef Element(llvm::sys::path::filename(
75           StringRef(Path).drop_back(ConsumedLength)));
76       Children[Element].Path = Path;
77     }
78     StringRef Element(llvm::sys::path::filename(
79           StringRef(NewPath).drop_back(ConsumedLength)));
80     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
81   }
82 
83   /// Tries to find the node under this \c FileMatchTrieNode that best
84   /// matches 'FileName'.
85   ///
86   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
87   /// \c true and an empty string is returned. If no path fits 'FileName', an
88   /// empty string is returned. \c ConsumedLength denotes the number of
89   /// \c Filename's trailing characters already consumed during recursion.
90   ///
91   /// To find the best matching node for a given path 'p', the
92   /// \c findEquivalent() function is called recursively for each path segment
93   /// (back to front) of 'p' until a node 'n' is reached that does not ..
94   /// - .. have children. In this case it is checked
95   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
96   ///   found. Otherwise continue with the parent node as if this node did not
97   ///   exist.
98   /// - .. a child matching the next path segment. In this case, all children of
99   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
100   ///   recursively and their equivalence to 'p' is determined. If none are
101   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
102   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
103   ///   error.
104   StringRef findEquivalent(const PathComparator& Comparator,
105                            StringRef FileName,
106                            bool &IsAmbiguous,
107                            unsigned ConsumedLength = 0) const {
108     // Note: we support only directory symlinks for performance reasons.
109     if (Children.empty()) {
110       // As far as we do not support file symlinks, compare
111       // basenames here to avoid request to file system.
112       if (llvm::sys::path::filename(Path) ==
113               llvm::sys::path::filename(FileName) &&
114           Comparator.equivalent(StringRef(Path), FileName))
115         return StringRef(Path);
116       return {};
117     }
118     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
119         ConsumedLength)));
120     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
121         Children.find(Element);
122     if (MatchingChild != Children.end()) {
123       StringRef Result = MatchingChild->getValue().findEquivalent(
124           Comparator, FileName, IsAmbiguous,
125           ConsumedLength + Element.size() + 1);
126       if (!Result.empty() || IsAmbiguous)
127         return Result;
128     }
129 
130     // If `ConsumedLength` is zero, this is the root and we have no filename
131     // match. Give up in this case, we don't try to find symlinks with
132     // different names.
133     if (ConsumedLength == 0)
134       return {};
135 
136     std::vector<StringRef> AllChildren;
137     getAll(AllChildren, MatchingChild);
138     StringRef Result;
139     for (const auto &Child : AllChildren) {
140       if (Comparator.equivalent(Child, FileName)) {
141         if (Result.empty()) {
142           Result = Child;
143         } else {
144           IsAmbiguous = true;
145           return {};
146         }
147       }
148     }
149     return Result;
150   }
151 
152 private:
153   /// Gets all paths under this FileMatchTrieNode.
154   void getAll(std::vector<StringRef> &Results,
155               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
156     if (Path.empty())
157       return;
158     if (Children.empty()) {
159       Results.push_back(StringRef(Path));
160       return;
161     }
162     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
163          It = Children.begin(), E = Children.end();
164          It != E; ++It) {
165       if (It == Except)
166         continue;
167       It->getValue().getAll(Results, Children.end());
168     }
169   }
170 
171   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
172   // nodes where Children.empty().
173   std::string Path;
174 
175   // The children of this node stored in a map based on the next path segment.
176   llvm::StringMap<FileMatchTrieNode> Children;
177 };
178 
179 } // namespace tooling
180 } // namespace clang
181 
182 FileMatchTrie::FileMatchTrie()
183     : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
184 
185 FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
186     : Root(new FileMatchTrieNode), Comparator(Comparator) {}
187 
188 FileMatchTrie::~FileMatchTrie() {
189   delete Root;
190 }
191 
192 void FileMatchTrie::insert(StringRef NewPath) {
193   Root->insert(NewPath);
194 }
195 
196 StringRef FileMatchTrie::findEquivalent(StringRef FileName,
197                                         raw_ostream &Error) const {
198   if (llvm::sys::path::is_relative(FileName)) {
199     Error << "Cannot resolve relative paths";
200     return {};
201   }
202   bool IsAmbiguous = false;
203   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
204   if (IsAmbiguous)
205     Error << "Path is ambiguous";
206   return Result;
207 }
208