1 /* 2 * Copyright 2011 Michael Drake <tlsa@netsurf-browser.org> 3 * 4 * This file is part of NetSurf, http://www.netsurf-browser.org/ 5 * 6 * NetSurf is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; version 2 of the License. 9 * 10 * NetSurf is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 /** \file 20 * NetSurf URL handling (interface). 21 */ 22 23 #ifndef _NETSURF_UTILS_NSURL_H_ 24 #define _NETSURF_UTILS_NSURL_H_ 25 26 #include <libwapcaplet/libwapcaplet.h> 27 #include "utils/errors.h" 28 29 30 /** NetSurf URL object */ 31 typedef struct nsurl nsurl; 32 33 /** A type for URL schemes */ 34 enum nsurl_scheme_type { 35 NSURL_SCHEME_OTHER, 36 NSURL_SCHEME_HTTP, 37 NSURL_SCHEME_HTTPS, 38 NSURL_SCHEME_FILE, 39 NSURL_SCHEME_FTP, 40 NSURL_SCHEME_MAILTO, 41 NSURL_SCHEME_DATA 42 }; 43 44 typedef enum nsurl_component { 45 NSURL_SCHEME = (1 << 0), 46 NSURL_USERNAME = (1 << 1), 47 NSURL_PASSWORD = (1 << 2), 48 NSURL_CREDENTIALS = NSURL_USERNAME | NSURL_PASSWORD, 49 NSURL_HOST = (1 << 3), 50 NSURL_PORT = (1 << 4), 51 NSURL_AUTHORITY = NSURL_CREDENTIALS | NSURL_HOST | NSURL_PORT, 52 NSURL_PATH = (1 << 5), 53 NSURL_QUERY = (1 << 6), 54 NSURL_COMPLETE = NSURL_SCHEME | NSURL_AUTHORITY | 55 NSURL_PATH | NSURL_QUERY, 56 NSURL_FRAGMENT = (1 << 7), 57 NSURL_WITH_FRAGMENT = NSURL_COMPLETE | NSURL_FRAGMENT 58 } nsurl_component; 59 60 61 /** 62 * Create a NetSurf URL object from a URL string 63 * 64 * \param url_s String to create NetSurf URL from 65 * \param url Returns a NetSurf URL 66 * \return NSERROR_OK on success, appropriate error otherwise 67 * 68 * If return value != NSERROR_OK, nothing will be returned in url. 69 * 70 * It is up to the client to call nsurl_unref when they are finished with 71 * the created object. 72 */ 73 nserror nsurl_create(const char * const url_s, nsurl **url); 74 75 76 /** 77 * Increment the reference count to a NetSurf URL object 78 * 79 * \param url NetSurf URL to create another reference to 80 * \return The NetSurf URL pointer to use as the copy 81 * 82 * Use this when copying a NetSurf URL into a persistent data structure. 83 */ 84 nsurl *nsurl_ref(nsurl *url); 85 86 87 /** 88 * Drop a reference to a NetSurf URL object 89 * 90 * \param url NetSurf URL to drop reference to 91 * 92 * When the reference count reaches zero then the NetSurf URL will be destroyed 93 */ 94 void nsurl_unref(nsurl *url); 95 96 97 /** 98 * Compare two URLs 99 * 100 * \param url1 First NetSurf URL 101 * \param url2 Second NetSurf URL 102 * \param parts The URL components to be compared 103 * \return true on match else false 104 * 105 */ 106 bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts); 107 108 109 /** 110 * Get URL (section) as a string, from a NetSurf URL object 111 * 112 * \param url NetSurf URL 113 * \param parts The required URL components. 114 * \param url_s Returns a url string 115 * \param url_l Returns length of url_s 116 * \return NSERROR_OK on success, appropriate error otherwise 117 * 118 * If return value != NSERROR_OK, nothing will be returned in url_s or url_l. 119 * 120 * The string returned in url_s is owned by the client and it is up to them 121 * to free it. It includes a trailing '\0'. 122 * 123 * The length returned in url_l excludes the trailing '\0'. 124 * 125 * That the required URL components be consecutive is not enforced, however, 126 * non-consecutive URL components generally make no sense. The exception 127 * is removal of credentials from a URL, such as for display in browser 128 * window URL bar. 'NSURL_COMPLETE &~ NSURL_PASSWORD' would remove the 129 * password from a complete URL. 130 */ 131 nserror nsurl_get(const nsurl *url, nsurl_component parts, 132 char **url_s, size_t *url_l); 133 134 135 /** 136 * Get part of a URL as a lwc_string, from a NetSurf URL object 137 * 138 * \param url NetSurf URL object 139 * \param part The URL component required 140 * \return the required component as an lwc_string, or NULL 141 * 142 * The caller owns the returned lwc_string and should call lwc_string_unref 143 * when they are done with it. 144 * 145 * The valid values for the part parameter are: 146 * NSURL_SCHEME 147 * NSURL_USERNAME 148 * NSURL_PASSWORD 149 * NSURL_HOST 150 * NSURL_PORT 151 * NSURL_PATH 152 * NSURL_QUERY 153 * NSURL_FRAGMENT 154 */ 155 lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part); 156 157 158 /** 159 * Get the scheme type from a NetSurf URL object 160 * 161 * \param url NetSurf URL object 162 * \return The URL scheme type. 163 */ 164 enum nsurl_scheme_type nsurl_get_scheme_type(const nsurl *url); 165 166 167 /** 168 * Enquire about the existence of componenets in a given URL 169 * 170 * \param url NetSurf URL object 171 * \param part The URL components confirm existence of 172 * \return true iff the component in question exists in url 173 * 174 * The valid values for the part parameter are: 175 * NSURL_SCHEME 176 * NSURL_USERNAME 177 * NSURL_PASSWORD 178 * NSURL_CREDENTIALS 179 * NSURL_HOST 180 * NSURL_PORT 181 * NSURL_PATH 182 * NSURL_QUERY 183 * NSURL_FRAGMENT 184 */ 185 bool nsurl_has_component(const nsurl *url, nsurl_component part); 186 187 188 /** 189 * Access a NetSurf URL object as a string 190 * 191 * \param url NetSurf URL to retrieve a string pointer for. 192 * \return the required string 193 * 194 * The returned string is owned by the NetSurf URL object. It will die 195 * with the NetSurf URL object. Keep a reference to the URL if you need it. 196 * 197 * The returned string has a trailing '\0'. 198 */ 199 const char *nsurl_access(const nsurl *url); 200 201 202 /** 203 * Variant of \ref nsurl_access for logging. 204 * 205 * \param url NetSurf URL to retrieve a string pointer for. 206 * \return the required string 207 * 208 * This will not necessarily return the actual nsurl's URL, but something 209 * that is suitable for recording to logs. E.g. URLs with the `data` scheme 210 * will return a simple place holder, to avoid repeatedly dumping loads of data. 211 * 212 * The returned string is owned by the NetSurf URL object. It will die 213 * with the NetSurf URL object. Keep a reference to the URL if you need it. 214 * 215 * The returned string has a trailing '\0'. 216 */ 217 const char *nsurl_access_log(const nsurl *url); 218 219 220 /** 221 * Get a UTF-8 string (for human readable IDNs) from a NetSurf URL object 222 * 223 * \param url NetSurf URL object 224 * \param url_s Returns a url string 225 * \param url_l Returns length of url_s 226 * \return NSERROR_OK on success, appropriate error otherwise 227 * 228 * If return value != NSERROR_OK, nothing will be returned in url_s or url_l. 229 * 230 * The string returned in url_s is owned by the client and it is up to them 231 * to free it. It includes a trailing '\0'. 232 * 233 * The length returned in url_l excludes the trailing '\0'. 234 */ 235 nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l); 236 237 238 /** 239 * Access a URL's path leaf as a string 240 * 241 * \param url NetSurf URL to retrieve a string pointer for. 242 * \return the required string 243 * 244 * The returned string is owned by the NetSurf URL object. It will die 245 * with the NetSurf URL object. Keep a reference to the URL if you need it. 246 * 247 * The returned string has a trailing '\0'. 248 */ 249 const char *nsurl_access_leaf(const nsurl *url); 250 251 252 /** 253 * Find the length of a NetSurf URL object's URL, as returned by nsurl_access 254 * 255 * \param url NetSurf URL to find length of. 256 * \return the required string 257 * 258 * The returned length excludes the trailing '\0'. 259 */ 260 size_t nsurl_length(const nsurl *url); 261 262 263 /** 264 * Get a URL's hash value 265 * 266 * \param url NetSurf URL get hash value for. 267 * \return the hash value 268 */ 269 uint32_t nsurl_hash(const nsurl *url); 270 271 272 /** 273 * Join a base url to a relative link part, creating a new NetSurf URL object 274 * 275 * \param base NetSurf URL containing the base to join rel to 276 * \param rel String containing the relative link part 277 * \param joined Returns joined NetSurf URL 278 * \return NSERROR_OK on success, appropriate error otherwise 279 * 280 * If return value != NSERROR_OK, nothing will be returned in join. 281 * 282 * It is up to the client to call nsurl_unref when they are finished with 283 * the created object. 284 */ 285 nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined); 286 287 288 /** 289 * Create a NetSurf URL object without a fragment from a NetSurf URL 290 * 291 * \param url NetSurf URL to create new NetSurf URL from 292 * \param no_frag Returns new NetSurf URL without fragment 293 * \return NSERROR_OK on success, appropriate error otherwise 294 * 295 * If return value != NSERROR_OK, nothing will be returned in no_frag. 296 * 297 * It is up to the client to call nsurl_unref when they are finished with 298 * the created object. 299 */ 300 nserror nsurl_defragment(const nsurl *url, nsurl **no_frag); 301 302 303 /** 304 * Create a NetSurf URL object, adding a fragment to an existing URL object 305 * 306 * \param url NetSurf URL to create new NetSurf URL from 307 * \param frag Fragment to add 308 * \param new_url Returns new NetSurf URL without fragment 309 * \return NSERROR_OK on success, appropriate error otherwise 310 * 311 * If return value != NSERROR_OK, nothing will be returned in new_url. 312 * 313 * It is up to the client to call nsurl_unref when they are finished with 314 * the created object. 315 * 316 * Any fragment in url is replaced with frag in new_url. 317 */ 318 nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url); 319 320 321 /** 322 * Create a NetSurf URL object, with query string replaced 323 * 324 * \param url NetSurf URL to create new NetSurf URL from 325 * \param query Query string to use 326 * \param new_url Returns new NetSurf URL with query string provided 327 * \return NSERROR_OK on success, appropriate error otherwise 328 * 329 * If return value != NSERROR_OK, nothing will be returned in new_url. 330 * 331 * It is up to the client to call nsurl_unref when they are finished with 332 * the created object. 333 * 334 * Any query component in url is replaced with query in new_url. 335 * 336 * Passing the empty string as a replacement will result in the query 337 * component being removed. 338 */ 339 nserror nsurl_replace_query(const nsurl *url, const char *query, 340 nsurl **new_url); 341 342 343 /** 344 * Create a NetSurf URL object, with scheme replaced 345 * 346 * \param url NetSurf URL to create new NetSurf URL from 347 * \param scheme Scheme to use 348 * \param new_url Returns new NetSurf URL with scheme provided 349 * \return NSERROR_OK on success, appropriate error otherwise 350 * 351 * If return value != NSERROR_OK, nothing will be returned in new_url. 352 * 353 * It is up to the client to call nsurl_unref when they are finished with 354 * the created object. 355 * 356 * Any scheme component in url is replaced with scheme in new_url. 357 */ 358 nserror nsurl_replace_scheme(const nsurl *url, lwc_string *scheme, 359 nsurl **new_url); 360 361 362 /** 363 * Attempt to find a nice filename for a URL. 364 * 365 * \param url A NetSurf URL object to create a filename from 366 * \param result Updated to caller-owned string with filename 367 * \param remove_extensions remove any extensions from the filename 368 * \return NSERROR_OK on success, appropriate error otherwise 369 * 370 * Caller must ensure string result string is freed, if NSERROR_OK returned. 371 */ 372 nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions); 373 374 375 /** 376 * Create a NetSurf URL object for URL with parent location of an existing URL. 377 * 378 * \param url NetSurf URL to create new NetSurf URL from 379 * \param new_url Returns new NetSurf URL with parent URL path 380 * \return NSERROR_OK on success, appropriate error otherwise 381 * 382 * If return value != NSERROR_OK, nothing will be returned in new_url. 383 * 384 * It is up to the client to call nsurl_unref when they are finished with 385 * the created object. 386 * 387 * As well as stripping top most path segment, query and fragments are stripped. 388 */ 389 nserror nsurl_parent(const nsurl *url, nsurl **new_url); 390 391 /** 392 * Dump a NetSurf URL's internal components to stderr 393 * 394 * This is helper functionality for developers, and shouldn't be called 395 * generally. 396 * 397 * \param url The NetSurf URL to dump components of 398 */ 399 void nsurl_dump(const nsurl *url); 400 401 #endif 402