1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef nsURLHelper_h__
7 #define nsURLHelper_h__
8 
9 #include "nsString.h"
10 #include "nsTArray.h"
11 #include "nsASCIIMask.h"
12 
13 class nsIFile;
14 class nsIURLParser;
15 
16 enum netCoalesceFlags {
17   NET_COALESCE_NORMAL = 0,
18 
19   /**
20    * retains /../ that reach above dir root (useful for FTP
21    * servers in which the root of the FTP URL is not necessarily
22    * the root of the FTP filesystem).
23    */
24   NET_COALESCE_ALLOW_RELATIVE_ROOT = 1 << 0,
25 
26   /**
27    * recognizes /%2F and // as markers for the root directory
28    * and handles them properly.
29    */
30   NET_COALESCE_DOUBLE_SLASH_IS_ROOT = 1 << 1
31 };
32 
33 //----------------------------------------------------------------------------
34 // This module contains some private helper functions related to URL parsing.
35 //----------------------------------------------------------------------------
36 
37 /* shutdown frees URL parser */
38 void net_ShutdownURLHelper();
39 #ifdef XP_MACOSX
40 void net_ShutdownURLHelperOSX();
41 #endif
42 
43 /* access URL parsers */
44 nsIURLParser* net_GetAuthURLParser();
45 nsIURLParser* net_GetNoAuthURLParser();
46 nsIURLParser* net_GetStdURLParser();
47 
48 /* convert between nsIFile and file:// URL spec
49  * net_GetURLSpecFromFile does an extra stat, so callers should
50  * avoid it if possible in favor of net_GetURLSpecFromActualFile
51  * and net_GetURLSpecFromDir */
52 nsresult net_GetURLSpecFromFile(nsIFile*, nsACString&);
53 nsresult net_GetURLSpecFromDir(nsIFile*, nsACString&);
54 nsresult net_GetURLSpecFromActualFile(nsIFile*, nsACString&);
55 nsresult net_GetFileFromURLSpec(const nsACString&, nsIFile**);
56 
57 /* extract file path components from file:// URL */
58 nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
59                           nsACString& outFileBaseName,
60                           nsACString& outFileExtension);
61 
62 /* handle .. in dirs while resolving URLs (path is UTF-8) */
63 void net_CoalesceDirs(netCoalesceFlags flags, char* path);
64 
65 /**
66  * Check if a URL is absolute
67  *
68  * @param inURL     URL spec
69  * @return true if the given spec represents an absolute URL
70  */
71 bool net_IsAbsoluteURL(const nsACString& uri);
72 
73 /**
74  * Extract URI-Scheme if possible
75  *
76  * @param inURI     URI spec
77  * @param scheme    scheme copied to this buffer on return. Is lowercase.
78  */
79 nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme);
80 
81 /* check that the given scheme conforms to RFC 2396 */
82 bool net_IsValidScheme(const nsACString& scheme);
83 
84 /**
85  * This function strips out all C0 controls and space at the beginning and end
86  * of the URL and filters out \r, \n, \t from the middle of the URL.  This makes
87  * it safe to call on things like javascript: urls or data: urls, where we may
88  * in fact run into whitespace that is not properly encoded.
89  *
90  * @param input the URL spec we want to filter
91  * @param result the out param to write to if filtering happens
92  */
93 void net_FilterURIString(const nsACString& input, nsACString& result);
94 
95 /**
96  * This function performs character stripping just like net_FilterURIString,
97  * with the added benefit of also performing percent escaping of dissallowed
98  * characters, all in one pass. Saving one pass is very important when operating
99  * on really large strings.
100  *
101  * @param aInput the URL spec we want to filter
102  * @param aFlags the flags which control which characters we escape
103  * @param aFilterMask a mask of characters that should excluded from the result
104  * @param aResult the out param to write to if filtering happens
105  */
106 nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
107                                 const ASCIIMaskArray& aFilterMask,
108                                 nsACString& aResult);
109 
110 #if defined(XP_WIN)
111 /**
112  * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
113  * forward-slash.  This function maps any back-slashes to forward-slashes.
114  *
115  * @param aURL
116  *        The URL string to normalize (UTF-8 encoded).  This can be a
117  *        relative URL segment.
118  * @param aResultBuf
119  *        The resulting string is appended to this string.  If the input URL
120  *        is already normalized, then aResultBuf is unchanged.
121  *
122  * @returns false if aURL is already normalized.  Otherwise, returns true.
123  */
124 bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf);
125 #endif
126 
127 /*****************************************************************************
128  * generic string routines follow (XXX move to someplace more generic).
129  */
130 
131 /* convert to lower case */
132 void net_ToLowerCase(char* str, uint32_t length);
133 void net_ToLowerCase(char* str);
134 
135 /**
136  * returns pointer to first character of |str| in the given set.  if not found,
137  * then |end| is returned.  stops prematurely if a null byte is encountered,
138  * and returns the address of the null byte.
139  */
140 char* net_FindCharInSet(const char* iter, const char* stop, const char* set);
141 
142 /**
143  * returns pointer to first character of |str| NOT in the given set.  if all
144  * characters are in the given set, then |end| is returned.  if '\0' is not
145  * included in |set|, then stops prematurely if a null byte is encountered,
146  * and returns the address of the null byte.
147  */
148 char* net_FindCharNotInSet(const char* iter, const char* stop, const char* set);
149 
150 /**
151  * returns pointer to last character of |str| NOT in the given set.  if all
152  * characters are in the given set, then |str - 1| is returned.
153  */
154 char* net_RFindCharNotInSet(const char* stop, const char* iter,
155                             const char* set);
156 
157 /**
158  * Parses a content-type header and returns the content type and
159  * charset (if any).  aCharset is not modified if no charset is
160  * specified in anywhere in aHeaderStr.  In that case (no charset
161  * specified), aHadCharset is set to false.  Otherwise, it's set to
162  * true.  Note that aContentCharset can be empty even if aHadCharset
163  * is true.
164  *
165  * This parsing is suitable for HTTP request.  Use net_ParseContentType
166  * for parsing this header in HTTP responses.
167  */
168 void net_ParseRequestContentType(const nsACString& aHeaderStr,
169                                  nsACString& aContentType,
170                                  nsACString& aContentCharset,
171                                  bool* aHadCharset);
172 
173 /**
174  * Parses a content-type header and returns the content type and
175  * charset (if any).  aCharset is not modified if no charset is
176  * specified in anywhere in aHeaderStr.  In that case (no charset
177  * specified), aHadCharset is set to false.  Otherwise, it's set to
178  * true.  Note that aContentCharset can be empty even if aHadCharset
179  * is true.
180  */
181 void net_ParseContentType(const nsACString& aHeaderStr,
182                           nsACString& aContentType, nsACString& aContentCharset,
183                           bool* aHadCharset);
184 /**
185  * As above, but also returns the start and end indexes for the charset
186  * parameter in aHeaderStr.  These are indices for the entire parameter, NOT
187  * just the value.  If there is "effectively" no charset parameter (e.g. if an
188  * earlier type with one is overridden by a later type without one),
189  * *aHadCharset will be true but *aCharsetStart will be set to -1.  Note that
190  * it's possible to have aContentCharset empty and *aHadCharset true when
191  * *aCharsetStart is nonnegative; this corresponds to charset="".
192  */
193 void net_ParseContentType(const nsACString& aHeaderStr,
194                           nsACString& aContentType, nsACString& aContentCharset,
195                           bool* aHadCharset, int32_t* aCharsetStart,
196                           int32_t* aCharsetEnd);
197 
198 /* inline versions */
199 
200 /* remember the 64-bit platforms ;-) */
201 #define NET_MAX_ADDRESS ((char*)UINTPTR_MAX)
202 
net_FindCharInSet(const char * str,const char * set)203 inline char* net_FindCharInSet(const char* str, const char* set) {
204   return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
205 }
net_FindCharNotInSet(const char * str,const char * set)206 inline char* net_FindCharNotInSet(const char* str, const char* set) {
207   return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
208 }
net_RFindCharNotInSet(const char * str,const char * set)209 inline char* net_RFindCharNotInSet(const char* str, const char* set) {
210   return net_RFindCharNotInSet(str, str + strlen(str), set);
211 }
212 
213 /**
214  * This function returns true if the given hostname does not include any
215  * restricted characters.  Otherwise, false is returned.
216  */
217 bool net_IsValidHostName(const nsACString& host);
218 
219 /**
220  * Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
221  */
222 bool net_IsValidIPv4Addr(const nsACString& aAddr);
223 
224 /**
225  * Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
226  */
227 bool net_IsValidIPv6Addr(const nsACString& aAddr);
228 
229 namespace mozilla {
230 /**
231  * A class for handling form-urlencoded query strings.
232  *
233  * Manages an ordered list of name-value pairs, and allows conversion from and
234  * to the string representation.
235  *
236  * In addition, there are static functions for handling one-shot use cases.
237  */
238 class URLParams final {
239  public:
240   /**
241    * \brief Parses a query string and calls a parameter handler for each
242    * name/value pair. The parameter handler can stop processing early by
243    * returning false.
244    *
245    * \param aInput the query string to parse
246    * \param aParamHandler the parameter handler as desribed above
247    * \tparam ParamHandler a function type compatible with signature
248    * bool(nsString, nsString)
249    *
250    * \return false if the parameter handler returned false for any parameter,
251    * true otherwise
252    */
253   template <typename ParamHandler>
Parse(const nsACString & aInput,ParamHandler aParamHandler)254   static bool Parse(const nsACString& aInput, ParamHandler aParamHandler) {
255     const char* start = aInput.BeginReading();
256     const char* const end = aInput.EndReading();
257 
258     while (start != end) {
259       nsAutoString decodedName;
260       nsAutoString decodedValue;
261 
262       if (!ParseNextInternal(start, end, &decodedName, &decodedValue)) {
263         continue;
264       }
265 
266       if (!aParamHandler(std::move(decodedName), std::move(decodedValue))) {
267         return false;
268       }
269     }
270     return true;
271   }
272 
273   /**
274    * \brief Parses a query string and returns the value of a single parameter
275    * specified by name.
276    *
277    * If there are multiple parameters with the same name, the value of the first
278    * is returned.
279    *
280    * \param aInput the query string to parse
281    * \param aName the name of the parameter to extract
282    * \param[out] aValue will be assigned the parameter value, set to void if
283    * there is no match \return true iff there was a parameter with with name
284    * \paramref aName
285    */
286   static bool Extract(const nsACString& aInput, const nsAString& aName,
287                       nsAString& aValue);
288 
289   /**
290    * \brief Resets the state of this instance and parses a new query string.
291    *
292    * \param aInput the query string to parse
293    */
294   void ParseInput(const nsACString& aInput);
295 
296   /**
297    * Serializes the current state to a query string.
298    */
299   void Serialize(nsAString& aValue) const;
300 
301   void Get(const nsAString& aName, nsString& aRetval);
302 
303   void GetAll(const nsAString& aName, nsTArray<nsString>& aRetval);
304 
305   /**
306    * \brief Sets the value of a given parameter.
307    *
308    * If one or more parameters of the name exist, the value of the first is
309    * replaced, and all further parameters of the name are deleted. Otherwise,
310    * the behaviour is the same as \ref Append.
311    */
312   void Set(const nsAString& aName, const nsAString& aValue);
313 
314   void Append(const nsAString& aName, const nsAString& aValue);
315 
316   bool Has(const nsAString& aName);
317 
318   /**
319    * \brief Deletes all parameters with the given name.
320    */
321   void Delete(const nsAString& aName);
322 
DeleteAll()323   void DeleteAll() { mParams.Clear(); }
324 
Length()325   uint32_t Length() const { return mParams.Length(); }
326 
GetKeyAtIndex(uint32_t aIndex)327   const nsAString& GetKeyAtIndex(uint32_t aIndex) const {
328     MOZ_ASSERT(aIndex < mParams.Length());
329     return mParams[aIndex].mKey;
330   }
331 
GetValueAtIndex(uint32_t aIndex)332   const nsAString& GetValueAtIndex(uint32_t aIndex) const {
333     MOZ_ASSERT(aIndex < mParams.Length());
334     return mParams[aIndex].mValue;
335   }
336 
337   /**
338    * \brief Performs a stable sort of the parameters, maintaining the order of
339    * multiple parameters with the same name.
340    */
341   void Sort();
342 
343  private:
344   static void DecodeString(const nsACString& aInput, nsAString& aOutput);
345   static void ConvertString(const nsACString& aInput, nsAString& aOutput);
346   static bool ParseNextInternal(const char*& aStart, const char* aEnd,
347                                 nsAString* aOutDecodedName,
348                                 nsAString* aOutDecodedValue);
349 
350   struct Param {
351     nsString mKey;
352     nsString mValue;
353   };
354 
355   nsTArray<Param> mParams;
356 };
357 }  // namespace mozilla
358 
359 #endif  // !nsURLHelper_h__
360