1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef URL_URL_UTIL_H_
6 #define URL_URL_UTIL_H_
7 
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "base/component_export.h"
13 #include "base/strings/string16.h"
14 #include "base/strings/string_piece.h"
15 #include "url/third_party/mozilla/url_parse.h"
16 #include "url/url_canon.h"
17 #include "url/url_constants.h"
18 
19 namespace url {
20 
21 // Init ------------------------------------------------------------------------
22 
23 // Used for tests that need to reset schemes. Note that this can only be used
24 // in conjunction with ScopedSchemeRegistryForTests.
25 COMPONENT_EXPORT(URL) void ClearSchemesForTests();
26 
27 class ScopedSchemeRegistryInternal;
28 
29 // Stores the SchemeRegistry upon creation, allowing tests to modify a copy of
30 // it, and restores the original SchemeRegistry when deleted.
COMPONENT_EXPORT(URL)31 class COMPONENT_EXPORT(URL) ScopedSchemeRegistryForTests {
32  public:
33   ScopedSchemeRegistryForTests();
34   ~ScopedSchemeRegistryForTests();
35 
36  private:
37   std::unique_ptr<ScopedSchemeRegistryInternal> internal_;
38 };
39 
40 // Schemes ---------------------------------------------------------------------
41 
42 // Changes the behavior of SchemeHostPort / Origin to allow non-standard schemes
43 // to be specified, instead of canonicalizing them to an invalid SchemeHostPort
44 // or opaque Origin, respectively. This is used for Android WebView backwards
45 // compatibility, which allows the use of custom schemes: content hosted in
46 // Android WebView assumes that one URL with a non-standard scheme will be
47 // same-origin to another URL with the same non-standard scheme.
48 //
49 // Not thread-safe.
50 COMPONENT_EXPORT(URL) void EnableNonStandardSchemesForAndroidWebView();
51 
52 // Whether or not SchemeHostPort and Origin allow non-standard schemes.
53 COMPONENT_EXPORT(URL) bool AllowNonStandardSchemesForAndroidWebView();
54 
55 // The following Add*Scheme method are not threadsafe and can not be called
56 // concurrently with any other url_util function. They will assert if the lists
57 // of schemes have been locked (see LockSchemeRegistries), or used.
58 
59 // Adds an application-defined scheme to the internal list of "standard-format"
60 // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
61 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
62 
63 COMPONENT_EXPORT(URL)
64 void AddStandardScheme(const char* new_scheme, SchemeType scheme_type);
65 
66 // Adds an application-defined scheme to the internal list of schemes allowed
67 // for referrers.
68 COMPONENT_EXPORT(URL)
69 void AddReferrerScheme(const char* new_scheme, SchemeType scheme_type);
70 
71 // Adds an application-defined scheme to the list of schemes that do not trigger
72 // mixed content warnings.
73 COMPONENT_EXPORT(URL) void AddSecureScheme(const char* new_scheme);
74 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetSecureSchemes();
75 
76 // Adds an application-defined scheme to the list of schemes that normal pages
77 // cannot link to or access (i.e., with the same security rules as those applied
78 // to "file" URLs).
79 COMPONENT_EXPORT(URL) void AddLocalScheme(const char* new_scheme);
80 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetLocalSchemes();
81 
82 // Adds an application-defined scheme to the list of schemes that cause pages
83 // loaded with them to not have access to pages loaded with any other URL
84 // scheme.
85 COMPONENT_EXPORT(URL) void AddNoAccessScheme(const char* new_scheme);
86 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetNoAccessSchemes();
87 
88 // Adds an application-defined scheme to the list of schemes that can be sent
89 // CORS requests.
90 COMPONENT_EXPORT(URL) void AddCorsEnabledScheme(const char* new_scheme);
91 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCorsEnabledSchemes();
92 
93 // Adds an application-defined scheme to the list of web schemes that can be
94 // used by web to store data (e.g. cookies, local storage, ...). This is
95 // to differentiate them from schemes that can store data but are not used on
96 // web (e.g. application's internal schemes) or schemes that are used on web but
97 // cannot store data.
98 COMPONENT_EXPORT(URL) void AddWebStorageScheme(const char* new_scheme);
99 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetWebStorageSchemes();
100 
101 // Adds an application-defined scheme to the list of schemes that can bypass the
102 // Content-Security-Policy (CSP) checks.
103 COMPONENT_EXPORT(URL) void AddCSPBypassingScheme(const char* new_scheme);
104 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetCSPBypassingSchemes();
105 
106 // Adds an application-defined scheme to the list of schemes that are strictly
107 // empty documents, allowing them to commit synchronously.
108 COMPONENT_EXPORT(URL) void AddEmptyDocumentScheme(const char* new_scheme);
109 COMPONENT_EXPORT(URL) const std::vector<std::string>& GetEmptyDocumentSchemes();
110 
111 // Sets a flag to prevent future calls to Add*Scheme from succeeding.
112 //
113 // This is designed to help prevent errors for multithreaded applications.
114 // Normal usage would be to call Add*Scheme for your custom schemes at
115 // the beginning of program initialization, and then LockSchemeRegistries. This
116 // prevents future callers from mistakenly calling Add*Scheme when the
117 // program is running with multiple threads, where such usage would be
118 // dangerous.
119 //
120 // We could have had Add*Scheme use a lock instead, but that would add
121 // some platform-specific dependencies we don't otherwise have now, and is
122 // overkill considering the normal usage is so simple.
123 COMPONENT_EXPORT(URL) void LockSchemeRegistries();
124 
125 // Locates the scheme in the given string and places it into |found_scheme|,
126 // which may be NULL to indicate the caller does not care about the range.
127 //
128 // Returns whether the given |compare| scheme matches the scheme found in the
129 // input (if any). The |compare| scheme must be a valid canonical scheme or
130 // the result of the comparison is undefined.
131 COMPONENT_EXPORT(URL)
132 bool FindAndCompareScheme(const char* str,
133                           int str_len,
134                           const char* compare,
135                           Component* found_scheme);
136 COMPONENT_EXPORT(URL)
137 bool FindAndCompareScheme(const base::char16* str,
138                           int str_len,
139                           const char* compare,
140                           Component* found_scheme);
FindAndCompareScheme(const std::string & str,const char * compare,Component * found_scheme)141 inline bool FindAndCompareScheme(const std::string& str,
142                                  const char* compare,
143                                  Component* found_scheme) {
144   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
145                               compare, found_scheme);
146 }
FindAndCompareScheme(const base::string16 & str,const char * compare,Component * found_scheme)147 inline bool FindAndCompareScheme(const base::string16& str,
148                                  const char* compare,
149                                  Component* found_scheme) {
150   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
151                               compare, found_scheme);
152 }
153 
154 // Returns true if the given scheme identified by |scheme| within |spec| is in
155 // the list of known standard-format schemes (see AddStandardScheme).
156 COMPONENT_EXPORT(URL)
157 bool IsStandard(const char* spec, const Component& scheme);
158 COMPONENT_EXPORT(URL)
159 bool IsStandard(const base::char16* spec, const Component& scheme);
160 
161 // Returns true if the given scheme identified by |scheme| within |spec| is in
162 // the list of allowed schemes for referrers (see AddReferrerScheme).
163 COMPONENT_EXPORT(URL)
164 bool IsReferrerScheme(const char* spec, const Component& scheme);
165 
166 // Returns true and sets |type| to the SchemeType of the given scheme
167 // identified by |scheme| within |spec| if the scheme is in the list of known
168 // standard-format schemes (see AddStandardScheme).
169 COMPONENT_EXPORT(URL)
170 bool GetStandardSchemeType(const char* spec,
171                            const Component& scheme,
172                            SchemeType* type);
173 COMPONENT_EXPORT(URL)
174 bool GetStandardSchemeType(const base::char16* spec,
175                            const Component& scheme,
176                            SchemeType* type);
177 
178 // Hosts  ----------------------------------------------------------------------
179 
180 // Returns true if the |canonical_host| matches or is in the same domain as the
181 // given |canonical_domain| string. For example, if the canonicalized hostname
182 // is "www.google.com", this will return true for "com", "google.com", and
183 // "www.google.com" domains.
184 //
185 // If either of the input StringPieces is empty, the return value is false. The
186 // input domain should match host canonicalization rules. i.e. it should be
187 // lowercase except for escape chars.
188 COMPONENT_EXPORT(URL)
189 bool DomainIs(base::StringPiece canonical_host,
190               base::StringPiece canonical_domain);
191 
192 // Returns true if the hostname is an IP address. Note: this function isn't very
193 // cheap, as it must re-parse the host to verify.
194 COMPONENT_EXPORT(URL) bool HostIsIPAddress(base::StringPiece host);
195 
196 // URL library wrappers --------------------------------------------------------
197 
198 // Parses the given spec according to the extracted scheme type. Normal users
199 // should use the URL object, although this may be useful if performance is
200 // critical and you don't want to do the heap allocation for the std::string.
201 //
202 // As with the Canonicalize* functions, the charset converter can
203 // be NULL to use UTF-8 (it will be faster in this case).
204 //
205 // Returns true if a valid URL was produced, false if not. On failure, the
206 // output and parsed structures will still be filled and will be consistent,
207 // but they will not represent a loadable URL.
208 COMPONENT_EXPORT(URL)
209 bool Canonicalize(const char* spec,
210                   int spec_len,
211                   bool trim_path_end,
212                   CharsetConverter* charset_converter,
213                   CanonOutput* output,
214                   Parsed* output_parsed);
215 COMPONENT_EXPORT(URL)
216 bool Canonicalize(const base::char16* spec,
217                   int spec_len,
218                   bool trim_path_end,
219                   CharsetConverter* charset_converter,
220                   CanonOutput* output,
221                   Parsed* output_parsed);
222 
223 // Resolves a potentially relative URL relative to the given parsed base URL.
224 // The base MUST be valid. The resulting canonical URL and parsed information
225 // will be placed in to the given out variables.
226 //
227 // The relative need not be relative. If we discover that it's absolute, this
228 // will produce a canonical version of that URL. See Canonicalize() for more
229 // about the charset_converter.
230 //
231 // Returns true if the output is valid, false if the input could not produce
232 // a valid URL.
233 COMPONENT_EXPORT(URL)
234 bool ResolveRelative(const char* base_spec,
235                      int base_spec_len,
236                      const Parsed& base_parsed,
237                      const char* relative,
238                      int relative_length,
239                      CharsetConverter* charset_converter,
240                      CanonOutput* output,
241                      Parsed* output_parsed);
242 COMPONENT_EXPORT(URL)
243 bool ResolveRelative(const char* base_spec,
244                      int base_spec_len,
245                      const Parsed& base_parsed,
246                      const base::char16* relative,
247                      int relative_length,
248                      CharsetConverter* charset_converter,
249                      CanonOutput* output,
250                      Parsed* output_parsed);
251 
252 // Replaces components in the given VALID input URL. The new canonical URL info
253 // is written to output and out_parsed.
254 //
255 // Returns true if the resulting URL is valid.
256 COMPONENT_EXPORT(URL)
257 bool ReplaceComponents(const char* spec,
258                        int spec_len,
259                        const Parsed& parsed,
260                        const Replacements<char>& replacements,
261                        CharsetConverter* charset_converter,
262                        CanonOutput* output,
263                        Parsed* out_parsed);
264 COMPONENT_EXPORT(URL)
265 bool ReplaceComponents(const char* spec,
266                        int spec_len,
267                        const Parsed& parsed,
268                        const Replacements<base::char16>& replacements,
269                        CharsetConverter* charset_converter,
270                        CanonOutput* output,
271                        Parsed* out_parsed);
272 
273 // String helper functions -----------------------------------------------------
274 
275 enum class DecodeURLMode {
276   // UTF-8 decode only. Invalid byte sequences are replaced with U+FFFD.
277   kUTF8,
278   // Try UTF-8 decoding. If the input contains byte sequences invalid
279   // for UTF-8, apply byte to Unicode mapping.
280   kUTF8OrIsomorphic,
281 };
282 
283 // Unescapes the given string using URL escaping rules.
284 COMPONENT_EXPORT(URL)
285 void DecodeURLEscapeSequences(const char* input,
286                               int length,
287                               DecodeURLMode mode,
288                               CanonOutputW* output);
289 
290 // Escapes the given string as defined by the JS method encodeURIComponent. See
291 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
292 COMPONENT_EXPORT(URL)
293 void EncodeURIComponent(const char* input, int length, CanonOutput* output);
294 
295 }  // namespace url
296 
297 #endif  // URL_URL_UTIL_H_
298