1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "URI.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/Twine.h"
12 #include "llvm/Support/Error.h"
13 #include "llvm/Support/Format.h"
14 #include "llvm/Support/FormatVariadic.h"
15 #include "llvm/Support/Path.h"
16 #include <algorithm>
17 
18 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
19 
20 namespace clang {
21 namespace clangd {
22 namespace {
23 
make_string_error(const llvm::Twine & Message)24 inline llvm::Error make_string_error(const llvm::Twine &Message) {
25   return llvm::make_error<llvm::StringError>(Message,
26                                              llvm::inconvertibleErrorCode());
27 }
28 
29 /// This manages file paths in the file system. All paths in the scheme
30 /// are absolute (with leading '/').
31 /// Note that this scheme is hardcoded into the library and not registered in
32 /// registry.
33 class FileSystemScheme : public URIScheme {
34 public:
35   llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef,llvm::StringRef Body,llvm::StringRef) const36   getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
37                   llvm::StringRef /*HintPath*/) const override {
38     if (!Body.startswith("/"))
39       return make_string_error("File scheme: expect body to be an absolute "
40                                "path starting with '/': " +
41                                Body);
42     // For Windows paths e.g. /X:
43     if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':')
44       Body.consume_front("/");
45     llvm::SmallVector<char, 16> Path(Body.begin(), Body.end());
46     llvm::sys::path::native(Path);
47     return std::string(Path.begin(), Path.end());
48   }
49 
50   llvm::Expected<URI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const51   uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
52     std::string Body;
53     // For Windows paths e.g. X:
54     if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':')
55       Body = "/";
56     Body += llvm::sys::path::convert_to_slash(AbsolutePath);
57     return URI("file", /*Authority=*/"", Body);
58   }
59 };
60 
61 llvm::Expected<std::unique_ptr<URIScheme>>
findSchemeByName(llvm::StringRef Scheme)62 findSchemeByName(llvm::StringRef Scheme) {
63   if (Scheme == "file")
64     return std::make_unique<FileSystemScheme>();
65 
66   for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end();
67        I != E; ++I) {
68     if (I->getName() != Scheme)
69       continue;
70     return I->instantiate();
71   }
72   return make_string_error("Can't find scheme: " + Scheme);
73 }
74 
shouldEscape(unsigned char C)75 bool shouldEscape(unsigned char C) {
76   // Unreserved characters.
77   if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
78       (C >= '0' && C <= '9'))
79     return false;
80   switch (C) {
81   case '-':
82   case '_':
83   case '.':
84   case '~':
85   case '/': // '/' is only reserved when parsing.
86   // ':' is only reserved for relative URI paths, which clangd doesn't produce.
87   case ':':
88     return false;
89   }
90   return true;
91 }
92 
93 /// Encodes a string according to percent-encoding.
94 /// - Unreserved characters are not escaped.
95 /// - Reserved characters always escaped with exceptions like '/'.
96 /// - All other characters are escaped.
percentEncode(llvm::StringRef Content,std::string & Out)97 void percentEncode(llvm::StringRef Content, std::string &Out) {
98   std::string Result;
99   for (unsigned char C : Content)
100     if (shouldEscape(C))
101     {
102       Out.push_back('%');
103       Out.push_back(llvm::hexdigit(C / 16));
104       Out.push_back(llvm::hexdigit(C % 16));
105     } else
106     { Out.push_back(C); }
107 }
108 
109 /// Decodes a string according to percent-encoding.
percentDecode(llvm::StringRef Content)110 std::string percentDecode(llvm::StringRef Content) {
111   std::string Result;
112   for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
113     if (*I != '%') {
114       Result += *I;
115       continue;
116     }
117     if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
118         llvm::isHexDigit(*(I + 2))) {
119       Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
120       I += 2;
121     } else
122       Result.push_back(*I);
123   }
124   return Result;
125 }
126 
isValidScheme(llvm::StringRef Scheme)127 bool isValidScheme(llvm::StringRef Scheme) {
128   if (Scheme.empty())
129     return false;
130   if (!llvm::isAlpha(Scheme[0]))
131     return false;
132   return std::all_of(Scheme.begin() + 1, Scheme.end(), [](char C) {
133     return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
134   });
135 }
136 
137 } // namespace
138 
URI(llvm::StringRef Scheme,llvm::StringRef Authority,llvm::StringRef Body)139 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
140          llvm::StringRef Body)
141     : Scheme(Scheme), Authority(Authority), Body(Body) {
142   assert(!Scheme.empty());
143   assert((Authority.empty() || Body.startswith("/")) &&
144          "URI body must start with '/' when authority is present.");
145 }
146 
toString() const147 std::string URI::toString() const {
148   std::string Result;
149   percentEncode(Scheme, Result);
150   Result.push_back(':');
151   if (Authority.empty() && Body.empty())
152     return Result;
153   // If authority if empty, we only print body if it starts with "/"; otherwise,
154   // the URI is invalid.
155   if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
156   {
157     Result.append("//");
158     percentEncode(Authority, Result);
159   }
160   percentEncode(Body, Result);
161   return Result;
162 }
163 
parse(llvm::StringRef OrigUri)164 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
165   URI U;
166   llvm::StringRef Uri = OrigUri;
167 
168   auto Pos = Uri.find(':');
169   if (Pos == llvm::StringRef::npos)
170     return make_string_error("Scheme must be provided in URI: " + OrigUri);
171   auto SchemeStr = Uri.substr(0, Pos);
172   U.Scheme = percentDecode(SchemeStr);
173   if (!isValidScheme(U.Scheme))
174     return make_string_error(llvm::formatv("Invalid scheme: {0} (decoded: {1})",
175                                            SchemeStr, U.Scheme));
176   Uri = Uri.substr(Pos + 1);
177   if (Uri.consume_front("//")) {
178     Pos = Uri.find('/');
179     U.Authority = percentDecode(Uri.substr(0, Pos));
180     Uri = Uri.substr(Pos);
181   }
182   U.Body = percentDecode(Uri);
183   return U;
184 }
185 
resolve(llvm::StringRef FileURI,llvm::StringRef HintPath)186 llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
187                                          llvm::StringRef HintPath) {
188   auto Uri = URI::parse(FileURI);
189   if (!Uri)
190     return Uri.takeError();
191   auto Path = URI::resolve(*Uri, HintPath);
192   if (!Path)
193     return Path.takeError();
194   return *Path;
195 }
196 
create(llvm::StringRef AbsolutePath,llvm::StringRef Scheme)197 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
198                                 llvm::StringRef Scheme) {
199   if (!llvm::sys::path::is_absolute(AbsolutePath))
200     return make_string_error("Not a valid absolute path: " + AbsolutePath);
201   auto S = findSchemeByName(Scheme);
202   if (!S)
203     return S.takeError();
204   return S->get()->uriFromAbsolutePath(AbsolutePath);
205 }
206 
create(llvm::StringRef AbsolutePath)207 URI URI::create(llvm::StringRef AbsolutePath) {
208   if (!llvm::sys::path::is_absolute(AbsolutePath))
209     llvm_unreachable(
210         ("Not a valid absolute path: " + AbsolutePath).str().c_str());
211   for (auto &Entry : URISchemeRegistry::entries()) {
212     auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
213     // For some paths, conversion to different URI schemes is impossible. These
214     // should be just skipped.
215     if (!URI) {
216       // Ignore the error.
217       llvm::consumeError(URI.takeError());
218       continue;
219     }
220     return std::move(*URI);
221   }
222   // Fallback to file: scheme which should work for any paths.
223   return URI::createFile(AbsolutePath);
224 }
225 
createFile(llvm::StringRef AbsolutePath)226 URI URI::createFile(llvm::StringRef AbsolutePath) {
227   auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
228   if (!U)
229     llvm_unreachable(llvm::toString(U.takeError()).c_str());
230   return std::move(*U);
231 }
232 
resolve(const URI & Uri,llvm::StringRef HintPath)233 llvm::Expected<std::string> URI::resolve(const URI &Uri,
234                                          llvm::StringRef HintPath) {
235   auto S = findSchemeByName(Uri.Scheme);
236   if (!S)
237     return S.takeError();
238   return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
239 }
240 
resolvePath(llvm::StringRef AbsPath,llvm::StringRef HintPath)241 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
242                                              llvm::StringRef HintPath) {
243   if (!llvm::sys::path::is_absolute(AbsPath))
244     llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
245   for (auto &Entry : URISchemeRegistry::entries()) {
246     auto S = Entry.instantiate();
247     auto U = S->uriFromAbsolutePath(AbsPath);
248     // For some paths, conversion to different URI schemes is impossible. These
249     // should be just skipped.
250     if (!U) {
251       // Ignore the error.
252       llvm::consumeError(U.takeError());
253       continue;
254     }
255     return S->getAbsolutePath(U->Authority, U->Body, HintPath);
256   }
257   // Fallback to file: scheme which doesn't do any canonicalization.
258   return AbsPath;
259 }
260 
includeSpelling(const URI & Uri)261 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
262   auto S = findSchemeByName(Uri.Scheme);
263   if (!S)
264     return S.takeError();
265   return S->get()->getIncludeSpelling(Uri);
266 }
267 
268 } // namespace clangd
269 } // namespace clang
270