1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6 #ifndef nsURLHelper_h__
7 #define nsURLHelper_h__
8
9 #include "nsString.h"
10 #include "nsTArray.h"
11 #include "nsASCIIMask.h"
12
13 class nsIFile;
14 class nsIURLParser;
15
16 enum netCoalesceFlags {
17 NET_COALESCE_NORMAL = 0,
18
19 /**
20 * retains /../ that reach above dir root (useful for FTP
21 * servers in which the root of the FTP URL is not necessarily
22 * the root of the FTP filesystem).
23 */
24 NET_COALESCE_ALLOW_RELATIVE_ROOT = 1 << 0,
25
26 /**
27 * recognizes /%2F and // as markers for the root directory
28 * and handles them properly.
29 */
30 NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1 << 1
31 };
32
33 //----------------------------------------------------------------------------
34 // This module contains some private helper functions related to URL parsing.
35 //----------------------------------------------------------------------------
36
37 /* shutdown frees URL parser */
38 void net_ShutdownURLHelper();
39 #ifdef XP_MACOSX
40 void net_ShutdownURLHelperOSX();
41 #endif
42
43 /* access URL parsers */
44 nsIURLParser* net_GetAuthURLParser();
45 nsIURLParser* net_GetNoAuthURLParser();
46 nsIURLParser* net_GetStdURLParser();
47
48 /* convert between nsIFile and file:// URL spec
49 * net_GetURLSpecFromFile does an extra stat, so callers should
50 * avoid it if possible in favor of net_GetURLSpecFromActualFile
51 * and net_GetURLSpecFromDir */
52 nsresult net_GetURLSpecFromFile(nsIFile*, nsACString&);
53 nsresult net_GetURLSpecFromDir(nsIFile*, nsACString&);
54 nsresult net_GetURLSpecFromActualFile(nsIFile*, nsACString&);
55 nsresult net_GetFileFromURLSpec(const nsACString&, nsIFile**);
56
57 /* extract file path components from file:// URL */
58 nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
59 nsACString& outFileBaseName,
60 nsACString& outFileExtension);
61
62 /* handle .. in dirs while resolving URLs (path is UTF-8) */
63 void net_CoalesceDirs(netCoalesceFlags flags, char* path);
64
65 /**
66 * Check if a URL is absolute
67 *
68 * @param inURL URL spec
69 * @return true if the given spec represents an absolute URL
70 */
71 bool net_IsAbsoluteURL(const nsACString& uri);
72
73 /**
74 * Extract URI-Scheme if possible
75 *
76 * @param inURI URI spec
77 * @param scheme scheme copied to this buffer on return. Is lowercase.
78 */
79 nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme);
80
81 /* check that the given scheme conforms to RFC 2396 */
82 bool net_IsValidScheme(const nsACString& scheme);
83
84 /**
85 * This function strips out all C0 controls and space at the beginning and end
86 * of the URL and filters out \r, \n, \t from the middle of the URL. This makes
87 * it safe to call on things like javascript: urls or data: urls, where we may
88 * in fact run into whitespace that is not properly encoded.
89 *
90 * @param input the URL spec we want to filter
91 * @param result the out param to write to if filtering happens
92 */
93 void net_FilterURIString(const nsACString& input, nsACString& result);
94
95 /**
96 * This function performs character stripping just like net_FilterURIString,
97 * with the added benefit of also performing percent escaping of dissallowed
98 * characters, all in one pass. Saving one pass is very important when operating
99 * on really large strings.
100 *
101 * @param aInput the URL spec we want to filter
102 * @param aFlags the flags which control which characters we escape
103 * @param aFilterMask a mask of characters that should excluded from the result
104 * @param aResult the out param to write to if filtering happens
105 */
106 nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
107 const ASCIIMaskArray& aFilterMask,
108 nsACString& aResult);
109
110 #if defined(XP_WIN)
111 /**
112 * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
113 * forward-slash. This function maps any back-slashes to forward-slashes.
114 *
115 * @param aURL
116 * The URL string to normalize (UTF-8 encoded). This can be a
117 * relative URL segment.
118 * @param aResultBuf
119 * The resulting string is appended to this string. If the input URL
120 * is already normalized, then aResultBuf is unchanged.
121 *
122 * @returns false if aURL is already normalized. Otherwise, returns true.
123 */
124 bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf);
125 #endif
126
127 /*****************************************************************************
128 * generic string routines follow (XXX move to someplace more generic).
129 */
130
131 /* convert to lower case */
132 void net_ToLowerCase(char* str, uint32_t length);
133 void net_ToLowerCase(char* str);
134
135 /**
136 * returns pointer to first character of |str| in the given set. if not found,
137 * then |end| is returned. stops prematurely if a null byte is encountered,
138 * and returns the address of the null byte.
139 */
140 char* net_FindCharInSet(const char* iter, const char* stop, const char* set);
141
142 /**
143 * returns pointer to first character of |str| NOT in the given set. if all
144 * characters are in the given set, then |end| is returned. if '\0' is not
145 * included in |set|, then stops prematurely if a null byte is encountered,
146 * and returns the address of the null byte.
147 */
148 char* net_FindCharNotInSet(const char* iter, const char* stop, const char* set);
149
150 /**
151 * returns pointer to last character of |str| NOT in the given set. if all
152 * characters are in the given set, then |str - 1| is returned.
153 */
154 char* net_RFindCharNotInSet(const char* stop, const char* iter,
155 const char* set);
156
157 /**
158 * Parses a content-type header and returns the content type and
159 * charset (if any). aCharset is not modified if no charset is
160 * specified in anywhere in aHeaderStr. In that case (no charset
161 * specified), aHadCharset is set to false. Otherwise, it's set to
162 * true. Note that aContentCharset can be empty even if aHadCharset
163 * is true.
164 *
165 * This parsing is suitable for HTTP request. Use net_ParseContentType
166 * for parsing this header in HTTP responses.
167 */
168 void net_ParseRequestContentType(const nsACString& aHeaderStr,
169 nsACString& aContentType,
170 nsACString& aContentCharset,
171 bool* aHadCharset);
172
173 /**
174 * Parses a content-type header and returns the content type and
175 * charset (if any). aCharset is not modified if no charset is
176 * specified in anywhere in aHeaderStr. In that case (no charset
177 * specified), aHadCharset is set to false. Otherwise, it's set to
178 * true. Note that aContentCharset can be empty even if aHadCharset
179 * is true.
180 */
181 void net_ParseContentType(const nsACString& aHeaderStr,
182 nsACString& aContentType, nsACString& aContentCharset,
183 bool* aHadCharset);
184 /**
185 * As above, but also returns the start and end indexes for the charset
186 * parameter in aHeaderStr. These are indices for the entire parameter, NOT
187 * just the value. If there is "effectively" no charset parameter (e.g. if an
188 * earlier type with one is overridden by a later type without one),
189 * *aHadCharset will be true but *aCharsetStart will be set to -1. Note that
190 * it's possible to have aContentCharset empty and *aHadCharset true when
191 * *aCharsetStart is nonnegative; this corresponds to charset="".
192 */
193 void net_ParseContentType(const nsACString& aHeaderStr,
194 nsACString& aContentType, nsACString& aContentCharset,
195 bool* aHadCharset, int32_t* aCharsetStart,
196 int32_t* aCharsetEnd);
197
198 /* inline versions */
199
200 /* remember the 64-bit platforms ;-) */
201 #define NET_MAX_ADDRESS ((char*)UINTPTR_MAX)
202
net_FindCharInSet(const char * str,const char * set)203 inline char* net_FindCharInSet(const char* str, const char* set) {
204 return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
205 }
net_FindCharNotInSet(const char * str,const char * set)206 inline char* net_FindCharNotInSet(const char* str, const char* set) {
207 return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
208 }
net_RFindCharNotInSet(const char * str,const char * set)209 inline char* net_RFindCharNotInSet(const char* str, const char* set) {
210 return net_RFindCharNotInSet(str, str + strlen(str), set);
211 }
212
213 /**
214 * This function returns true if the given hostname does not include any
215 * restricted characters. Otherwise, false is returned.
216 */
217 bool net_IsValidHostName(const nsACString& host);
218
219 /**
220 * Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
221 */
222 bool net_IsValidIPv4Addr(const nsACString& aAddr);
223
224 /**
225 * Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
226 */
227 bool net_IsValidIPv6Addr(const nsACString& aAddr);
228
229 namespace mozilla {
230 /**
231 * A class for handling form-urlencoded query strings.
232 *
233 * Manages an ordered list of name-value pairs, and allows conversion from and
234 * to the string representation.
235 *
236 * In addition, there are static functions for handling one-shot use cases.
237 */
238 class URLParams final {
239 public:
240 /**
241 * \brief Parses a query string and calls a parameter handler for each
242 * name/value pair. The parameter handler can stop processing early by
243 * returning false.
244 *
245 * \param aInput the query string to parse
246 * \param aParamHandler the parameter handler as desribed above
247 * \tparam ParamHandler a function type compatible with signature
248 * bool(nsString, nsString)
249 *
250 * \return false if the parameter handler returned false for any parameter,
251 * true otherwise
252 */
253 template <typename ParamHandler>
Parse(const nsACString & aInput,ParamHandler aParamHandler)254 static bool Parse(const nsACString& aInput, ParamHandler aParamHandler) {
255 const char* start = aInput.BeginReading();
256 const char* const end = aInput.EndReading();
257
258 while (start != end) {
259 nsAutoString decodedName;
260 nsAutoString decodedValue;
261
262 if (!ParseNextInternal(start, end, &decodedName, &decodedValue)) {
263 continue;
264 }
265
266 if (!aParamHandler(std::move(decodedName), std::move(decodedValue))) {
267 return false;
268 }
269 }
270 return true;
271 }
272
273 /**
274 * \brief Parses a query string and returns the value of a single parameter
275 * specified by name.
276 *
277 * If there are multiple parameters with the same name, the value of the first
278 * is returned.
279 *
280 * \param aInput the query string to parse
281 * \param aName the name of the parameter to extract
282 * \param[out] aValue will be assigned the parameter value, set to void if
283 * there is no match \return true iff there was a parameter with with name
284 * \paramref aName
285 */
286 static bool Extract(const nsACString& aInput, const nsAString& aName,
287 nsAString& aValue);
288
289 /**
290 * \brief Resets the state of this instance and parses a new query string.
291 *
292 * \param aInput the query string to parse
293 */
294 void ParseInput(const nsACString& aInput);
295
296 /**
297 * Serializes the current state to a query string.
298 */
299 void Serialize(nsAString& aValue) const;
300
301 void Get(const nsAString& aName, nsString& aRetval);
302
303 void GetAll(const nsAString& aName, nsTArray<nsString>& aRetval);
304
305 /**
306 * \brief Sets the value of a given parameter.
307 *
308 * If one or more parameters of the name exist, the value of the first is
309 * replaced, and all further parameters of the name are deleted. Otherwise,
310 * the behaviour is the same as \ref Append.
311 */
312 void Set(const nsAString& aName, const nsAString& aValue);
313
314 void Append(const nsAString& aName, const nsAString& aValue);
315
316 bool Has(const nsAString& aName);
317
318 /**
319 * \brief Deletes all parameters with the given name.
320 */
321 void Delete(const nsAString& aName);
322
DeleteAll()323 void DeleteAll() { mParams.Clear(); }
324
Length()325 uint32_t Length() const { return mParams.Length(); }
326
GetKeyAtIndex(uint32_t aIndex)327 const nsAString& GetKeyAtIndex(uint32_t aIndex) const {
328 MOZ_ASSERT(aIndex < mParams.Length());
329 return mParams[aIndex].mKey;
330 }
331
GetValueAtIndex(uint32_t aIndex)332 const nsAString& GetValueAtIndex(uint32_t aIndex) const {
333 MOZ_ASSERT(aIndex < mParams.Length());
334 return mParams[aIndex].mValue;
335 }
336
337 /**
338 * \brief Performs a stable sort of the parameters, maintaining the order of
339 * multiple parameters with the same name.
340 */
341 void Sort();
342
343 private:
344 static void DecodeString(const nsACString& aInput, nsAString& aOutput);
345 static void ConvertString(const nsACString& aInput, nsAString& aOutput);
346 static bool ParseNextInternal(const char*& aStart, const char* aEnd,
347 nsAString* aOutDecodedName,
348 nsAString* aOutDecodedValue);
349
350 struct Param {
351 nsString mKey;
352 nsString mValue;
353 };
354
355 nsTArray<Param> mParams;
356 };
357 } // namespace mozilla
358
359 #endif // !nsURLHelper_h__
360