1 /*
2  * Copyright 2011 Michael Drake <tlsa@netsurf-browser.org>
3  *
4  * This file is part of NetSurf, http://www.netsurf-browser.org/
5  *
6  * NetSurf is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * NetSurf is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /** \file
20  * NetSurf URL handling (interface).
21  */
22 
23 #ifndef _NETSURF_UTILS_NSURL_H_
24 #define _NETSURF_UTILS_NSURL_H_
25 
26 #include <libwapcaplet/libwapcaplet.h>
27 #include "utils/errors.h"
28 
29 
30 /** NetSurf URL object */
31 typedef struct nsurl nsurl;
32 
33 /** A type for URL schemes */
34 enum nsurl_scheme_type {
35 	NSURL_SCHEME_OTHER,
36 	NSURL_SCHEME_HTTP,
37 	NSURL_SCHEME_HTTPS,
38 	NSURL_SCHEME_FILE,
39 	NSURL_SCHEME_FTP,
40 	NSURL_SCHEME_MAILTO,
41 	NSURL_SCHEME_DATA
42 };
43 
44 typedef enum nsurl_component {
45 	NSURL_SCHEME		= (1 << 0),
46 	NSURL_USERNAME		= (1 << 1),
47 	NSURL_PASSWORD		= (1 << 2),
48 	NSURL_CREDENTIALS	= NSURL_USERNAME | NSURL_PASSWORD,
49 	NSURL_HOST		= (1 << 3),
50 	NSURL_PORT		= (1 << 4),
51 	NSURL_AUTHORITY		= NSURL_CREDENTIALS | NSURL_HOST | NSURL_PORT,
52 	NSURL_PATH		= (1 << 5),
53 	NSURL_QUERY		= (1 << 6),
54 	NSURL_COMPLETE		= NSURL_SCHEME | NSURL_AUTHORITY |
55 				  NSURL_PATH | NSURL_QUERY,
56 	NSURL_FRAGMENT		= (1 << 7),
57 	NSURL_WITH_FRAGMENT	= NSURL_COMPLETE | NSURL_FRAGMENT
58 } nsurl_component;
59 
60 
61 /**
62  * Create a NetSurf URL object from a URL string
63  *
64  * \param url_s	  String to create NetSurf URL from
65  * \param url	  Returns a NetSurf URL
66  * \return NSERROR_OK on success, appropriate error otherwise
67  *
68  * If return value != NSERROR_OK, nothing will be returned in url.
69  *
70  * It is up to the client to call nsurl_unref when they are finished with
71  * the created object.
72  */
73 nserror nsurl_create(const char * const url_s, nsurl **url);
74 
75 
76 /**
77  * Increment the reference count to a NetSurf URL object
78  *
79  * \param url	  NetSurf URL to create another reference to
80  * \return The NetSurf URL pointer to use as the copy
81  *
82  * Use this when copying a NetSurf URL into a persistent data structure.
83  */
84 nsurl *nsurl_ref(nsurl *url);
85 
86 
87 /**
88  * Drop a reference to a NetSurf URL object
89  *
90  * \param url	  NetSurf URL to drop reference to
91  *
92  * When the reference count reaches zero then the NetSurf URL will be destroyed
93  */
94 void nsurl_unref(nsurl *url);
95 
96 
97 /**
98  * Compare two URLs
99  *
100  * \param url1	  First NetSurf URL
101  * \param url2	  Second NetSurf URL
102  * \param parts	  The URL components to be compared
103  * \return true on match else false
104  *
105  */
106 bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts);
107 
108 
109 /**
110  * Get URL (section) as a string, from a NetSurf URL object
111  *
112  * \param url	  NetSurf URL
113  * \param parts	  The required URL components.
114  * \param url_s	  Returns a url string
115  * \param url_l	  Returns length of url_s
116  * \return NSERROR_OK on success, appropriate error otherwise
117  *
118  * If return value != NSERROR_OK, nothing will be returned in url_s or url_l.
119  *
120  * The string returned in url_s is owned by the client and it is up to them
121  * to free it.  It includes a trailing '\0'.
122  *
123  * The length returned in url_l excludes the trailing '\0'.
124  *
125  * That the required URL components be consecutive is not enforced, however,
126  * non-consecutive URL components generally make no sense.  The exception
127  * is removal of credentials from a URL, such as for display in browser
128  * window URL bar.  'NSURL_COMPLETE &~ NSURL_PASSWORD' would remove the
129  * password from a complete URL.
130  */
131 nserror nsurl_get(const nsurl *url, nsurl_component parts,
132 		char **url_s, size_t *url_l);
133 
134 
135 /**
136  * Get part of a URL as a lwc_string, from a NetSurf URL object
137  *
138  * \param url	  NetSurf URL object
139  * \param part	  The URL component required
140  * \return the required component as an lwc_string, or NULL
141  *
142  * The caller owns the returned lwc_string and should call lwc_string_unref
143  * when they are done with it.
144  *
145  * The valid values for the part parameter are:
146  *    NSURL_SCHEME
147  *    NSURL_USERNAME
148  *    NSURL_PASSWORD
149  *    NSURL_HOST
150  *    NSURL_PORT
151  *    NSURL_PATH
152  *    NSURL_QUERY
153  *    NSURL_FRAGMENT
154  */
155 lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part);
156 
157 
158 /**
159  * Get the scheme type from a NetSurf URL object
160  *
161  * \param url   NetSurf URL object
162  * \return The URL scheme type.
163  */
164 enum nsurl_scheme_type nsurl_get_scheme_type(const nsurl *url);
165 
166 
167 /**
168  * Enquire about the existence of componenets in a given URL
169  *
170  * \param url	  NetSurf URL object
171  * \param part	  The URL components confirm existence of
172  * \return true iff the component in question exists in url
173  *
174  * The valid values for the part parameter are:
175  *    NSURL_SCHEME
176  *    NSURL_USERNAME
177  *    NSURL_PASSWORD
178  *    NSURL_CREDENTIALS
179  *    NSURL_HOST
180  *    NSURL_PORT
181  *    NSURL_PATH
182  *    NSURL_QUERY
183  *    NSURL_FRAGMENT
184  */
185 bool nsurl_has_component(const nsurl *url, nsurl_component part);
186 
187 
188 /**
189  * Access a NetSurf URL object as a string
190  *
191  * \param url	  NetSurf URL to retrieve a string pointer for.
192  * \return the required string
193  *
194  * The returned string is owned by the NetSurf URL object.  It will die
195  * with the NetSurf URL object.  Keep a reference to the URL if you need it.
196  *
197  * The returned string has a trailing '\0'.
198  */
199 const char *nsurl_access(const nsurl *url);
200 
201 
202 /**
203  * Variant of \ref nsurl_access for logging.
204  *
205  * \param url	  NetSurf URL to retrieve a string pointer for.
206  * \return the required string
207  *
208  * This will not necessarily return the actual nsurl's URL, but something
209  * that is suitable for recording to logs.  E.g. URLs with the `data` scheme
210  * will return a simple place holder, to avoid repeatedly dumping loads of data.
211  *
212  * The returned string is owned by the NetSurf URL object.  It will die
213  * with the NetSurf URL object.  Keep a reference to the URL if you need it.
214  *
215  * The returned string has a trailing '\0'.
216  */
217 const char *nsurl_access_log(const nsurl *url);
218 
219 
220 /**
221  * Get a UTF-8 string (for human readable IDNs) from a NetSurf URL object
222  *
223  * \param url	  NetSurf URL object
224  * \param url_s	  Returns a url string
225  * \param url_l	  Returns length of url_s
226  * \return NSERROR_OK on success, appropriate error otherwise
227  *
228  * If return value != NSERROR_OK, nothing will be returned in url_s or url_l.
229  *
230  * The string returned in url_s is owned by the client and it is up to them
231  * to free it.  It includes a trailing '\0'.
232  *
233  * The length returned in url_l excludes the trailing '\0'.
234  */
235 nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l);
236 
237 
238 /**
239  * Access a URL's path leaf as a string
240  *
241  * \param url	  NetSurf URL to retrieve a string pointer for.
242  * \return the required string
243  *
244  * The returned string is owned by the NetSurf URL object.  It will die
245  * with the NetSurf URL object.  Keep a reference to the URL if you need it.
246  *
247  * The returned string has a trailing '\0'.
248  */
249 const char *nsurl_access_leaf(const nsurl *url);
250 
251 
252 /**
253  * Find the length of a NetSurf URL object's URL, as returned by nsurl_access
254  *
255  * \param url	  NetSurf URL to find length of.
256  * \return the required string
257  *
258  * The returned length excludes the trailing '\0'.
259  */
260 size_t nsurl_length(const nsurl *url);
261 
262 
263 /**
264  * Get a URL's hash value
265  *
266  * \param url	  NetSurf URL get hash value for.
267  * \return the hash value
268  */
269 uint32_t nsurl_hash(const nsurl *url);
270 
271 
272 /**
273  * Join a base url to a relative link part, creating a new NetSurf URL object
274  *
275  * \param base	  NetSurf URL containing the base to join rel to
276  * \param rel	  String containing the relative link part
277  * \param joined  Returns joined NetSurf URL
278  * \return NSERROR_OK on success, appropriate error otherwise
279  *
280  * If return value != NSERROR_OK, nothing will be returned in join.
281  *
282  * It is up to the client to call nsurl_unref when they are finished with
283  * the created object.
284  */
285 nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined);
286 
287 
288 /**
289  * Create a NetSurf URL object without a fragment from a NetSurf URL
290  *
291  * \param url	  NetSurf URL to create new NetSurf URL from
292  * \param no_frag Returns new NetSurf URL without fragment
293  * \return NSERROR_OK on success, appropriate error otherwise
294  *
295  * If return value != NSERROR_OK, nothing will be returned in no_frag.
296  *
297  * It is up to the client to call nsurl_unref when they are finished with
298  * the created object.
299  */
300 nserror nsurl_defragment(const nsurl *url, nsurl **no_frag);
301 
302 
303 /**
304  * Create a NetSurf URL object, adding a fragment to an existing URL object
305  *
306  * \param url	  NetSurf URL to create new NetSurf URL from
307  * \param frag	  Fragment to add
308  * \param new_url Returns new NetSurf URL without fragment
309  * \return NSERROR_OK on success, appropriate error otherwise
310  *
311  * If return value != NSERROR_OK, nothing will be returned in new_url.
312  *
313  * It is up to the client to call nsurl_unref when they are finished with
314  * the created object.
315  *
316  * Any fragment in url is replaced with frag in new_url.
317  */
318 nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url);
319 
320 
321 /**
322  * Create a NetSurf URL object, with query string replaced
323  *
324  * \param url	  NetSurf URL to create new NetSurf URL from
325  * \param query	  Query string to use
326  * \param new_url Returns new NetSurf URL with query string provided
327  * \return NSERROR_OK on success, appropriate error otherwise
328  *
329  * If return value != NSERROR_OK, nothing will be returned in new_url.
330  *
331  * It is up to the client to call nsurl_unref when they are finished with
332  * the created object.
333  *
334  * Any query component in url is replaced with query in new_url.
335  *
336  * Passing the empty string as a replacement will result in the query
337  * component being removed.
338  */
339 nserror nsurl_replace_query(const nsurl *url, const char *query,
340 		nsurl **new_url);
341 
342 
343 /**
344  * Create a NetSurf URL object, with scheme replaced
345  *
346  * \param url	  NetSurf URL to create new NetSurf URL from
347  * \param scheme  Scheme to use
348  * \param new_url Returns new NetSurf URL with scheme provided
349  * \return NSERROR_OK on success, appropriate error otherwise
350  *
351  * If return value != NSERROR_OK, nothing will be returned in new_url.
352  *
353  * It is up to the client to call nsurl_unref when they are finished with
354  * the created object.
355  *
356  * Any scheme component in url is replaced with scheme in new_url.
357  */
358 nserror nsurl_replace_scheme(const nsurl *url, lwc_string *scheme,
359 		nsurl **new_url);
360 
361 
362 /**
363  * Attempt to find a nice filename for a URL.
364  *
365  * \param url		A NetSurf URL object to create a filename from
366  * \param result	Updated to caller-owned string with filename
367  * \param remove_extensions  remove any extensions from the filename
368  * \return NSERROR_OK on success, appropriate error otherwise
369  *
370  * Caller must ensure string result string is freed, if NSERROR_OK returned.
371  */
372 nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions);
373 
374 
375 /**
376  * Create a NetSurf URL object for URL with parent location of an existing URL.
377  *
378  * \param url	  NetSurf URL to create new NetSurf URL from
379  * \param new_url Returns new NetSurf URL with parent URL path
380  * \return NSERROR_OK on success, appropriate error otherwise
381  *
382  * If return value != NSERROR_OK, nothing will be returned in new_url.
383  *
384  * It is up to the client to call nsurl_unref when they are finished with
385  * the created object.
386  *
387  * As well as stripping top most path segment, query and fragments are stripped.
388  */
389 nserror nsurl_parent(const nsurl *url, nsurl **new_url);
390 
391 /**
392  * Dump a NetSurf URL's internal components to stderr
393  *
394  * This is helper functionality for developers, and shouldn't be called
395  * generally.
396  *
397  * \param url	The NetSurf URL to dump components of
398  */
399 void nsurl_dump(const nsurl *url);
400 
401 #endif
402