1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "URI.h"
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/FormatVariadic.h"
16 #include "llvm/Support/Path.h"
17 #include <algorithm>
18 
19 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
20 
21 namespace clang {
22 namespace clangd {
23 namespace {
24 
isWindowsPath(llvm::StringRef Path)25 bool isWindowsPath(llvm::StringRef Path) {
26   return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':';
27 }
28 
isNetworkPath(llvm::StringRef Path)29 bool isNetworkPath(llvm::StringRef Path) {
30   return Path.size() > 2 && Path[0] == Path[1] &&
31          llvm::sys::path::is_separator(Path[0]);
32 }
33 
34 /// This manages file paths in the file system. All paths in the scheme
35 /// are absolute (with leading '/').
36 /// Note that this scheme is hardcoded into the library and not registered in
37 /// registry.
38 class FileSystemScheme : public URIScheme {
39 public:
40   llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef Authority,llvm::StringRef Body,llvm::StringRef) const41   getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
42                   llvm::StringRef /*HintPath*/) const override {
43     if (!Body.startswith("/"))
44       return error("File scheme: expect body to be an absolute path starting "
45                    "with '/': {0}",
46                    Body);
47     llvm::SmallString<128> Path;
48     if (!Authority.empty()) {
49       // Windows UNC paths e.g. file://server/share => \\server\share
50       ("//" + Authority).toVector(Path);
51     } else if (isWindowsPath(Body.substr(1))) {
52       // Windows paths e.g. file:///X:/path => X:\path
53       Body.consume_front("/");
54     }
55     Path.append(Body);
56     llvm::sys::path::native(Path);
57     return std::string(Path);
58   }
59 
60   llvm::Expected<URI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const61   uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
62     std::string Body;
63     llvm::StringRef Authority;
64     llvm::StringRef Root = llvm::sys::path::root_name(AbsolutePath);
65     if (isNetworkPath(Root)) {
66       // Windows UNC paths e.g. \\server\share => file://server/share
67       Authority = Root.drop_front(2);
68       AbsolutePath.consume_front(Root);
69     } else if (isWindowsPath(Root)) {
70       // Windows paths e.g. X:\path => file:///X:/path
71       Body = "/";
72     }
73     Body += llvm::sys::path::convert_to_slash(AbsolutePath);
74     return URI("file", Authority, Body);
75   }
76 };
77 
78 llvm::Expected<std::unique_ptr<URIScheme>>
findSchemeByName(llvm::StringRef Scheme)79 findSchemeByName(llvm::StringRef Scheme) {
80   if (Scheme == "file")
81     return std::make_unique<FileSystemScheme>();
82 
83   for (const auto &URIScheme : URISchemeRegistry::entries()) {
84     if (URIScheme.getName() != Scheme)
85       continue;
86     return URIScheme.instantiate();
87   }
88   return error("Can't find scheme: {0}", Scheme);
89 }
90 
shouldEscape(unsigned char C)91 bool shouldEscape(unsigned char C) {
92   // Unreserved characters.
93   if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
94       (C >= '0' && C <= '9'))
95     return false;
96   switch (C) {
97   case '-':
98   case '_':
99   case '.':
100   case '~':
101   case '/': // '/' is only reserved when parsing.
102   // ':' is only reserved for relative URI paths, which clangd doesn't produce.
103   case ':':
104     return false;
105   }
106   return true;
107 }
108 
109 /// Encodes a string according to percent-encoding.
110 /// - Unreserved characters are not escaped.
111 /// - Reserved characters always escaped with exceptions like '/'.
112 /// - All other characters are escaped.
percentEncode(llvm::StringRef Content,std::string & Out)113 void percentEncode(llvm::StringRef Content, std::string &Out) {
114   for (unsigned char C : Content)
115     if (shouldEscape(C)) {
116       Out.push_back('%');
117       Out.push_back(llvm::hexdigit(C / 16));
118       Out.push_back(llvm::hexdigit(C % 16));
119     } else {
120       Out.push_back(C);
121     }
122 }
123 
124 /// Decodes a string according to percent-encoding.
percentDecode(llvm::StringRef Content)125 std::string percentDecode(llvm::StringRef Content) {
126   std::string Result;
127   for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
128     if (*I != '%') {
129       Result += *I;
130       continue;
131     }
132     if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
133         llvm::isHexDigit(*(I + 2))) {
134       Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
135       I += 2;
136     } else
137       Result.push_back(*I);
138   }
139   return Result;
140 }
141 
isValidScheme(llvm::StringRef Scheme)142 bool isValidScheme(llvm::StringRef Scheme) {
143   if (Scheme.empty())
144     return false;
145   if (!llvm::isAlpha(Scheme[0]))
146     return false;
147   return std::all_of(Scheme.begin() + 1, Scheme.end(), [](char C) {
148     return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
149   });
150 }
151 
152 } // namespace
153 
URI(llvm::StringRef Scheme,llvm::StringRef Authority,llvm::StringRef Body)154 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
155          llvm::StringRef Body)
156     : Scheme(Scheme), Authority(Authority), Body(Body) {
157   assert(!Scheme.empty());
158   assert((Authority.empty() || Body.startswith("/")) &&
159          "URI body must start with '/' when authority is present.");
160 }
161 
toString() const162 std::string URI::toString() const {
163   std::string Result;
164   percentEncode(Scheme, Result);
165   Result.push_back(':');
166   if (Authority.empty() && Body.empty())
167     return Result;
168   // If authority if empty, we only print body if it starts with "/"; otherwise,
169   // the URI is invalid.
170   if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
171   {
172     Result.append("//");
173     percentEncode(Authority, Result);
174   }
175   percentEncode(Body, Result);
176   return Result;
177 }
178 
parse(llvm::StringRef OrigUri)179 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
180   URI U;
181   llvm::StringRef Uri = OrigUri;
182 
183   auto Pos = Uri.find(':');
184   if (Pos == llvm::StringRef::npos)
185     return error("Scheme must be provided in URI: {0}", OrigUri);
186   auto SchemeStr = Uri.substr(0, Pos);
187   U.Scheme = percentDecode(SchemeStr);
188   if (!isValidScheme(U.Scheme))
189     return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme);
190   Uri = Uri.substr(Pos + 1);
191   if (Uri.consume_front("//")) {
192     Pos = Uri.find('/');
193     U.Authority = percentDecode(Uri.substr(0, Pos));
194     Uri = Uri.substr(Pos);
195   }
196   U.Body = percentDecode(Uri);
197   return U;
198 }
199 
resolve(llvm::StringRef FileURI,llvm::StringRef HintPath)200 llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
201                                          llvm::StringRef HintPath) {
202   auto Uri = URI::parse(FileURI);
203   if (!Uri)
204     return Uri.takeError();
205   auto Path = URI::resolve(*Uri, HintPath);
206   if (!Path)
207     return Path.takeError();
208   return *Path;
209 }
210 
create(llvm::StringRef AbsolutePath,llvm::StringRef Scheme)211 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
212                                 llvm::StringRef Scheme) {
213   if (!llvm::sys::path::is_absolute(AbsolutePath))
214     return error("Not a valid absolute path: {0}", AbsolutePath);
215   auto S = findSchemeByName(Scheme);
216   if (!S)
217     return S.takeError();
218   return S->get()->uriFromAbsolutePath(AbsolutePath);
219 }
220 
create(llvm::StringRef AbsolutePath)221 URI URI::create(llvm::StringRef AbsolutePath) {
222   if (!llvm::sys::path::is_absolute(AbsolutePath))
223     llvm_unreachable(
224         ("Not a valid absolute path: " + AbsolutePath).str().c_str());
225   for (auto &Entry : URISchemeRegistry::entries()) {
226     auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
227     // For some paths, conversion to different URI schemes is impossible. These
228     // should be just skipped.
229     if (!URI) {
230       // Ignore the error.
231       llvm::consumeError(URI.takeError());
232       continue;
233     }
234     return std::move(*URI);
235   }
236   // Fallback to file: scheme which should work for any paths.
237   return URI::createFile(AbsolutePath);
238 }
239 
createFile(llvm::StringRef AbsolutePath)240 URI URI::createFile(llvm::StringRef AbsolutePath) {
241   auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
242   if (!U)
243     llvm_unreachable(llvm::toString(U.takeError()).c_str());
244   return std::move(*U);
245 }
246 
resolve(const URI & Uri,llvm::StringRef HintPath)247 llvm::Expected<std::string> URI::resolve(const URI &Uri,
248                                          llvm::StringRef HintPath) {
249   auto S = findSchemeByName(Uri.Scheme);
250   if (!S)
251     return S.takeError();
252   return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
253 }
254 
resolvePath(llvm::StringRef AbsPath,llvm::StringRef HintPath)255 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
256                                              llvm::StringRef HintPath) {
257   if (!llvm::sys::path::is_absolute(AbsPath))
258     llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
259   for (auto &Entry : URISchemeRegistry::entries()) {
260     auto S = Entry.instantiate();
261     auto U = S->uriFromAbsolutePath(AbsPath);
262     // For some paths, conversion to different URI schemes is impossible. These
263     // should be just skipped.
264     if (!U) {
265       // Ignore the error.
266       llvm::consumeError(U.takeError());
267       continue;
268     }
269     return S->getAbsolutePath(U->Authority, U->Body, HintPath);
270   }
271   // Fallback to file: scheme which doesn't do any canonicalization.
272   return std::string(AbsPath);
273 }
274 
includeSpelling(const URI & Uri)275 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
276   auto S = findSchemeByName(Uri.Scheme);
277   if (!S)
278     return S.takeError();
279   return S->get()->getIncludeSpelling(Uri);
280 }
281 
282 } // namespace clangd
283 } // namespace clang
284