1 /*
2  * Copyright 2011 Michael Drake <tlsa@netsurf-browser.org>
3  *
4  * This file is part of NetSurf, http://www.netsurf-browser.org/
5  *
6  * NetSurf is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * NetSurf is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /**
20  * \file
21  * NetSurf URL handling implementation.
22  *
23  * This is the common implementation of all URL handling within the
24  * browser. This implementation is based upon RFC3986 although this has
25  * been superceeded by https://url.spec.whatwg.org/ which is based on
26  * actual contemporary implementations.
27  *
28  * Care must be taken with character encodings within this module as
29  * the specifications work with specific ascii ranges and must not be
30  * affected by locale. Hence the c library character type functions
31  * are not used.
32  */
33 
34 #include <assert.h>
35 #include <libwapcaplet/libwapcaplet.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <strings.h>
39 #include <inttypes.h>
40 
41 #include "utils/ascii.h"
42 #include "utils/corestrings.h"
43 #include "utils/errors.h"
44 #include "utils/idna.h"
45 #include "utils/log.h"
46 #include "utils/nsurl/private.h"
47 #include "utils/nsurl.h"
48 #include "utils/utils.h"
49 
50 
51 /**
52  * Compare two component values.
53  *
54  * Sets match to false if the components are not the same.
55  * Does nothing if the components are the same, so ensure match is
56  * preset to true.
57  */
58 #define nsurl__component_compare(c1, c2, match)			\
59 	if (c1 && c2 && lwc_error_ok ==				\
60 			lwc_string_isequal(c1, c2, match)) {	\
61 		/* do nothing */                                \
62 	} else if (c1 || c2) {					\
63 		*match = false;					\
64 	}
65 
66 
67 
68 /******************************************************************************
69  * NetSurf URL Public API                                                     *
70  ******************************************************************************/
71 
72 /* exported interface, documented in nsurl.h */
nsurl_ref(nsurl * url)73 nsurl *nsurl_ref(nsurl *url)
74 {
75 	assert(url != NULL);
76 
77 	url->count++;
78 
79 	return url;
80 }
81 
82 
83 /* exported interface, documented in nsurl.h */
nsurl_unref(nsurl * url)84 void nsurl_unref(nsurl *url)
85 {
86 	assert(url != NULL);
87 	assert(url->count > 0);
88 
89 	if (--url->count > 0)
90 		return;
91 
92 	/* Release lwc strings */
93 	nsurl__components_destroy(&url->components);
94 
95 	/* Free the NetSurf URL */
96 	free(url);
97 }
98 
99 
100 /* exported interface, documented in nsurl.h */
nsurl_compare(const nsurl * url1,const nsurl * url2,nsurl_component parts)101 bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts)
102 {
103 	bool match = true;
104 
105 	assert(url1 != NULL);
106 	assert(url2 != NULL);
107 
108 	/* Compare URL components */
109 
110 	/* Path, host and query first, since they're most likely to differ */
111 
112 	if (parts & NSURL_PATH) {
113 		nsurl__component_compare(url1->components.path,
114 				url2->components.path, &match);
115 
116 		if (match == false)
117 			return false;
118 	}
119 
120 	if (parts & NSURL_HOST) {
121 		nsurl__component_compare(url1->components.host,
122 				url2->components.host, &match);
123 
124 		if (match == false)
125 			return false;
126 	}
127 
128 	if (parts & NSURL_QUERY) {
129 		nsurl__component_compare(url1->components.query,
130 				url2->components.query, &match);
131 
132 		if (match == false)
133 			return false;
134 	}
135 
136 	if (parts & NSURL_SCHEME) {
137 		nsurl__component_compare(url1->components.scheme,
138 				url2->components.scheme, &match);
139 
140 		if (match == false)
141 			return false;
142 	}
143 
144 	if (parts & NSURL_USERNAME) {
145 		nsurl__component_compare(url1->components.username,
146 				url2->components.username, &match);
147 
148 		if (match == false)
149 			return false;
150 	}
151 
152 	if (parts & NSURL_PASSWORD) {
153 		nsurl__component_compare(url1->components.password,
154 				url2->components.password, &match);
155 
156 		if (match == false)
157 			return false;
158 	}
159 
160 	if (parts & NSURL_PORT) {
161 		nsurl__component_compare(url1->components.port,
162 				url2->components.port, &match);
163 
164 		if (match == false)
165 			return false;
166 	}
167 
168 	if (parts & NSURL_FRAGMENT) {
169 		nsurl__component_compare(url1->components.fragment,
170 				url2->components.fragment, &match);
171 
172 		if (match == false)
173 			return false;
174 	}
175 
176 	return true;
177 }
178 
179 
180 /* exported interface, documented in nsurl.h */
nsurl_get(const nsurl * url,nsurl_component parts,char ** url_s,size_t * url_l)181 nserror nsurl_get(const nsurl *url, nsurl_component parts,
182 		char **url_s, size_t *url_l)
183 {
184 	assert(url != NULL);
185 
186 	return nsurl__components_to_string(&(url->components), parts, 0,
187 			url_s, url_l);
188 }
189 
190 
191 /* exported interface, documented in nsurl.h */
nsurl_get_component(const nsurl * url,nsurl_component part)192 lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part)
193 {
194 	assert(url != NULL);
195 
196 	switch (part) {
197 	case NSURL_SCHEME:
198 		return (url->components.scheme != NULL) ?
199 				lwc_string_ref(url->components.scheme) : NULL;
200 
201 	case NSURL_USERNAME:
202 		return (url->components.username != NULL) ?
203 				lwc_string_ref(url->components.username) : NULL;
204 
205 	case NSURL_PASSWORD:
206 		return (url->components.password != NULL) ?
207 				lwc_string_ref(url->components.password) : NULL;
208 
209 	case NSURL_HOST:
210 		return (url->components.host != NULL) ?
211 				lwc_string_ref(url->components.host) : NULL;
212 
213 	case NSURL_PORT:
214 		return (url->components.port != NULL) ?
215 				lwc_string_ref(url->components.port) : NULL;
216 
217 	case NSURL_PATH:
218 		return (url->components.path != NULL) ?
219 				lwc_string_ref(url->components.path) : NULL;
220 
221 	case NSURL_QUERY:
222 		return (url->components.query != NULL) ?
223 				lwc_string_ref(url->components.query) : NULL;
224 
225 	case NSURL_FRAGMENT:
226 		return (url->components.fragment != NULL) ?
227 				lwc_string_ref(url->components.fragment) : NULL;
228 
229 	default:
230 		NSLOG(netsurf, INFO,
231 		      "Unsupported value passed to part param.");
232 		assert(0);
233 	}
234 
235 	return NULL;
236 }
237 
238 
239 /* exported interface, documented in nsurl.h */
nsurl_get_scheme_type(const nsurl * url)240 enum nsurl_scheme_type nsurl_get_scheme_type(const nsurl *url)
241 {
242 	assert(url != NULL);
243 
244 	return url->components.scheme_type;
245 }
246 
247 
248 /* exported interface, documented in nsurl.h */
nsurl_has_component(const nsurl * url,nsurl_component part)249 bool nsurl_has_component(const nsurl *url, nsurl_component part)
250 {
251 	assert(url != NULL);
252 
253 	switch (part) {
254 	case NSURL_SCHEME:
255 		if (url->components.scheme != NULL)
256 			return true;
257 		else
258 			return false;
259 
260 	case NSURL_CREDENTIALS:
261 		/* Only username required for credentials section */
262 		/* Fall through */
263 	case NSURL_USERNAME:
264 		if (url->components.username != NULL)
265 			return true;
266 		else
267 			return false;
268 
269 	case NSURL_PASSWORD:
270 		if (url->components.password != NULL)
271 			return true;
272 		else
273 			return false;
274 
275 	case NSURL_HOST:
276 		if (url->components.host != NULL)
277 			return true;
278 		else
279 			return false;
280 
281 	case NSURL_PORT:
282 		if (url->components.port != NULL)
283 			return true;
284 		else
285 			return false;
286 
287 	case NSURL_PATH:
288 		if (url->components.path != NULL)
289 			return true;
290 		else
291 			return false;
292 
293 	case NSURL_QUERY:
294 		if (url->components.query != NULL)
295 			return true;
296 		else
297 			return false;
298 
299 	case NSURL_FRAGMENT:
300 		if (url->components.fragment != NULL)
301 			return true;
302 		else
303 			return false;
304 
305 	default:
306 		NSLOG(netsurf, INFO,
307 		      "Unsupported value passed to part param.");
308 		assert(0);
309 	}
310 
311 	return false;
312 }
313 
314 
315 /* exported interface, documented in nsurl.h */
nsurl_access(const nsurl * url)316 const char *nsurl_access(const nsurl *url)
317 {
318 	assert(url != NULL);
319 
320 	return url->string;
321 }
322 
323 
324 /* exported interface, documented in nsurl.h */
nsurl_access_log(const nsurl * url)325 const char *nsurl_access_log(const nsurl *url)
326 {
327 	assert(url != NULL);
328 
329 	if (url->components.scheme_type == NSURL_SCHEME_DATA) {
330 		return "[data url]";
331 	}
332 
333 	return url->string;
334 }
335 
336 
337 /* exported interface, documented in nsurl.h */
nsurl_get_utf8(const nsurl * url,char ** url_s,size_t * url_l)338 nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l)
339 {
340 	nserror err;
341 	lwc_string *host;
342 	char *idna_host = NULL;
343 	size_t idna_host_len;
344 	char *scheme = NULL;
345 	size_t scheme_len;
346 	char *path = NULL;
347 	size_t path_len;
348 
349 	assert(url != NULL);
350 
351 	if (url->components.host == NULL) {
352 		return nsurl_get(url, NSURL_WITH_FRAGMENT, url_s, url_l);
353 	}
354 
355 	host = url->components.host;
356 	err = idna_decode(lwc_string_data(host), lwc_string_length(host),
357 			&idna_host, &idna_host_len);
358 	if (err != NSERROR_OK) {
359 		goto cleanup;
360 	}
361 
362 	err = nsurl_get(url,
363 			NSURL_SCHEME | NSURL_CREDENTIALS,
364 			&scheme, &scheme_len);
365 	if (err != NSERROR_OK) {
366 		goto cleanup;
367 	}
368 
369 	err = nsurl_get(url,
370 			NSURL_PORT | NSURL_PATH | NSURL_QUERY | NSURL_FRAGMENT,
371 			&path, &path_len);
372 	if (err != NSERROR_OK) {
373 		goto cleanup;
374 	}
375 
376 	*url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */
377 	*url_s = malloc(*url_l);
378 
379 	if (*url_s == NULL) {
380 		err = NSERROR_NOMEM;
381 		goto cleanup;
382 	}
383 
384 	snprintf(*url_s, *url_l, "%s%s%s", scheme, idna_host, path);
385 
386 	err = NSERROR_OK;
387 
388 cleanup:
389 	free(idna_host);
390 	free(scheme);
391 	free(path);
392 
393 	return err;
394 }
395 
396 
397 /* exported interface, documented in nsurl.h */
nsurl_access_leaf(const nsurl * url)398 const char *nsurl_access_leaf(const nsurl *url)
399 {
400 	size_t path_len;
401 	const char *path;
402 	const char *leaf;
403 
404 	assert(url != NULL);
405 
406 	if (url->components.path == NULL)
407 		return "";
408 
409 	path = lwc_string_data(url->components.path);
410 	path_len = lwc_string_length(url->components.path);
411 
412 	if (path_len == 0)
413 		return "";
414 
415 	if (path_len == 1 && *path == '/')
416 		return "/";
417 
418 	leaf = path + path_len;
419 
420 	do {
421 		leaf--;
422 	} while ((leaf != path) && (*leaf != '/'));
423 
424 	if (*leaf == '/')
425 		leaf++;
426 
427 	return leaf;
428 }
429 
430 
431 /* exported interface, documented in nsurl.h */
nsurl_length(const nsurl * url)432 size_t nsurl_length(const nsurl *url)
433 {
434 	assert(url != NULL);
435 
436 	return url->length;
437 }
438 
439 
440 /* exported interface, documented in nsurl.h */
nsurl_hash(const nsurl * url)441 uint32_t nsurl_hash(const nsurl *url)
442 {
443 	assert(url != NULL);
444 
445 	return url->hash;
446 }
447 
448 
449 /* exported interface, documented in nsurl.h */
nsurl_defragment(const nsurl * url,nsurl ** no_frag)450 nserror nsurl_defragment(const nsurl *url, nsurl **no_frag)
451 {
452 	size_t length;
453 	char *pos;
454 
455 	assert(url != NULL);
456 
457 	/* check for source url having no fragment already */
458 	if (url->components.fragment == NULL) {
459 		*no_frag = (nsurl *)url;
460 
461 		(*no_frag)->count++;
462 
463 		return NSERROR_OK;
464 	}
465 
466 	/* Find the change in length from url to new_url */
467 	length = url->length;
468 	if (url->components.fragment != NULL) {
469 		length -= 1 + lwc_string_length(url->components.fragment);
470 	}
471 
472 	/* Create NetSurf URL object */
473 	*no_frag = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
474 	if (*no_frag == NULL) {
475 		return NSERROR_NOMEM;
476 	}
477 
478 	/* Copy components */
479 	(*no_frag)->components.scheme =
480 			nsurl__component_copy(url->components.scheme);
481 	(*no_frag)->components.username =
482 			nsurl__component_copy(url->components.username);
483 	(*no_frag)->components.password =
484 			nsurl__component_copy(url->components.password);
485 	(*no_frag)->components.host =
486 			nsurl__component_copy(url->components.host);
487 	(*no_frag)->components.port =
488 			nsurl__component_copy(url->components.port);
489 	(*no_frag)->components.path =
490 			nsurl__component_copy(url->components.path);
491 	(*no_frag)->components.query =
492 			nsurl__component_copy(url->components.query);
493 	(*no_frag)->components.fragment = NULL;
494 
495 	(*no_frag)->components.scheme_type = url->components.scheme_type;
496 
497 	(*no_frag)->length = length;
498 
499 	/* Fill out the url string */
500 	pos = (*no_frag)->string;
501 	memcpy(pos, url->string, length);
502 	pos += length;
503 	*pos = '\0';
504 
505 	/* Get the nsurl's hash */
506 	nsurl__calc_hash(*no_frag);
507 
508 	/* Give the URL a reference */
509 	(*no_frag)->count = 1;
510 
511 	return NSERROR_OK;
512 }
513 
514 
515 /* exported interface, documented in nsurl.h */
nsurl_refragment(const nsurl * url,lwc_string * frag,nsurl ** new_url)516 nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url)
517 {
518 	int frag_len;
519 	int base_len;
520 	char *pos;
521 	size_t len;
522 
523 	assert(url != NULL);
524 	assert(frag != NULL);
525 
526 	/* Find the change in length from url to new_url */
527 	base_len = url->length;
528 	if (url->components.fragment != NULL) {
529 		base_len -= 1 + lwc_string_length(url->components.fragment);
530 	}
531 	frag_len = lwc_string_length(frag);
532 
533 	/* Set new_url's length */
534 	len = base_len + 1 /* # */ + frag_len;
535 
536 	/* Create NetSurf URL object */
537 	*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
538 	if (*new_url == NULL) {
539 		return NSERROR_NOMEM;
540 	}
541 
542 	(*new_url)->length = len;
543 
544 	/* Set string */
545 	pos = (*new_url)->string;
546 	memcpy(pos, url->string, base_len);
547 	pos += base_len;
548 	*pos = '#';
549 	memcpy(++pos, lwc_string_data(frag), frag_len);
550 	pos += frag_len;
551 	*pos = '\0';
552 
553 	/* Copy components */
554 	(*new_url)->components.scheme =
555 			nsurl__component_copy(url->components.scheme);
556 	(*new_url)->components.username =
557 			nsurl__component_copy(url->components.username);
558 	(*new_url)->components.password =
559 			nsurl__component_copy(url->components.password);
560 	(*new_url)->components.host =
561 			nsurl__component_copy(url->components.host);
562 	(*new_url)->components.port =
563 			nsurl__component_copy(url->components.port);
564 	(*new_url)->components.path =
565 			nsurl__component_copy(url->components.path);
566 	(*new_url)->components.query =
567 			nsurl__component_copy(url->components.query);
568 	(*new_url)->components.fragment =
569 			lwc_string_ref(frag);
570 
571 	(*new_url)->components.scheme_type = url->components.scheme_type;
572 
573 	/* Get the nsurl's hash */
574 	nsurl__calc_hash(*new_url);
575 
576 	/* Give the URL a reference */
577 	(*new_url)->count = 1;
578 
579 	return NSERROR_OK;
580 }
581 
582 
583 /* exported interface, documented in nsurl.h */
nsurl_replace_query(const nsurl * url,const char * query,nsurl ** new_url)584 nserror nsurl_replace_query(const nsurl *url, const char *query,
585 		nsurl **new_url)
586 {
587 	int query_len;    /* Length of new query string excluding '?' */
588 	int frag_len = 0; /* Length of fragment, excluding '#' */
589 	int base_len;     /* Length of URL up to start of query */
590 	char *pos;        /* current position in output string */
591 	size_t length;    /* new url string length */
592 	lwc_string *lwc_query = NULL;
593 
594 	assert(url != NULL);
595 	assert(query != NULL);
596 
597 	length = query_len = strlen(query);
598 	if (query_len > 0) {
599 		length++; /* allow for '?' */
600 
601 		/* intern string */
602 		if (lwc_intern_string(query,
603 				      query_len,
604 				      &lwc_query) != lwc_error_ok) {
605 			return NSERROR_NOMEM;
606 		}
607 	}
608 
609 	/* Find the change in length from url to new_url */
610 	base_len = url->length;
611 	if (url->components.query != NULL) {
612 		base_len -= (1 + lwc_string_length(url->components.query));
613 	}
614 	if (url->components.fragment != NULL) {
615 		frag_len = lwc_string_length(url->components.fragment);
616 		base_len -= (1 + frag_len);
617 		length += frag_len + 1; /* allow for '#' */
618 	}
619 
620 	/* compute new url string length */
621 	length += base_len;
622 
623 	/* Create NetSurf URL object */
624 	*new_url = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */
625 	if (*new_url == NULL) {
626 		if (query_len > 0) {
627 			lwc_string_unref(lwc_query);
628 		}
629 		return NSERROR_NOMEM;
630 	}
631 
632 	(*new_url)->length = length;
633 
634 	/* Set string */
635 	pos = (*new_url)->string;
636 	memcpy(pos, url->string, base_len);
637 	pos += base_len;
638 	if (query_len > 0) {
639 		*pos = '?';
640 		memcpy(++pos, query, query_len);
641 		pos += query_len;
642 	}
643 	if (url->components.fragment != NULL) {
644 		const char *frag = lwc_string_data(url->components.fragment);
645 		*pos = '#';
646 		memcpy(++pos, frag, frag_len);
647 		pos += frag_len;
648 	}
649 	*pos = '\0';
650 
651 	/* Copy components */
652 	(*new_url)->components.scheme =
653 			nsurl__component_copy(url->components.scheme);
654 	(*new_url)->components.username =
655 			nsurl__component_copy(url->components.username);
656 	(*new_url)->components.password =
657 			nsurl__component_copy(url->components.password);
658 	(*new_url)->components.host =
659 			nsurl__component_copy(url->components.host);
660 	(*new_url)->components.port =
661 			nsurl__component_copy(url->components.port);
662 	(*new_url)->components.path =
663 			nsurl__component_copy(url->components.path);
664 	(*new_url)->components.query = lwc_query;
665 	(*new_url)->components.fragment =
666 			nsurl__component_copy(url->components.fragment);
667 
668 	(*new_url)->components.scheme_type = url->components.scheme_type;
669 
670 	/* Get the nsurl's hash */
671 	nsurl__calc_hash(*new_url);
672 
673 	/* Give the URL a reference */
674 	(*new_url)->count = 1;
675 
676 	return NSERROR_OK;
677 }
678 
679 
680 /* exported interface, documented in nsurl.h */
nsurl_replace_scheme(const nsurl * url,lwc_string * scheme,nsurl ** new_url)681 nserror nsurl_replace_scheme(const nsurl *url, lwc_string *scheme,
682 		nsurl **new_url)
683 {
684 	int scheme_len;
685 	int base_len;
686 	char *pos;
687 	size_t len;
688 	bool match;
689 
690 	assert(url != NULL);
691 	assert(scheme != NULL);
692 
693 	/* Get the length of the new scheme */
694 	scheme_len = lwc_string_length(scheme);
695 
696 	/* Find the change in length from url to new_url */
697 	base_len = url->length;
698 	if (url->components.scheme != NULL) {
699 		base_len -= lwc_string_length(url->components.scheme);
700 	}
701 
702 	/* Set new_url's length */
703 	len = base_len + scheme_len;
704 
705 	/* Create NetSurf URL object */
706 	*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
707 	if (*new_url == NULL) {
708 		return NSERROR_NOMEM;
709 	}
710 
711 	(*new_url)->length = len;
712 
713 	/* Set string */
714 	pos = (*new_url)->string;
715 	memcpy(pos, lwc_string_data(scheme), scheme_len);
716 	memcpy(pos + scheme_len,
717 			url->string + url->length - base_len, base_len);
718 	pos[len] = '\0';
719 
720 	/* Copy components */
721 	(*new_url)->components.scheme = lwc_string_ref(scheme);
722 	(*new_url)->components.username =
723 			nsurl__component_copy(url->components.username);
724 	(*new_url)->components.password =
725 			nsurl__component_copy(url->components.password);
726 	(*new_url)->components.host =
727 			nsurl__component_copy(url->components.host);
728 	(*new_url)->components.port =
729 			nsurl__component_copy(url->components.port);
730 	(*new_url)->components.path =
731 			nsurl__component_copy(url->components.path);
732 	(*new_url)->components.query =
733 			nsurl__component_copy(url->components.query);
734 	(*new_url)->components.fragment =
735 			nsurl__component_copy(url->components.fragment);
736 
737 	/* Compute new scheme type */
738 	if (lwc_string_caseless_isequal(scheme, corestring_lwc_http,
739 			&match) == lwc_error_ok && match == true) {
740 		(*new_url)->components.scheme_type = NSURL_SCHEME_HTTP;
741 	} else if (lwc_string_caseless_isequal(scheme, corestring_lwc_https,
742 			&match) == lwc_error_ok && match == true) {
743 		(*new_url)->components.scheme_type = NSURL_SCHEME_HTTPS;
744 	} else if (lwc_string_caseless_isequal(scheme, corestring_lwc_file,
745 			&match) == lwc_error_ok && match == true) {
746 		(*new_url)->components.scheme_type = NSURL_SCHEME_FILE;
747 	} else if (lwc_string_caseless_isequal(scheme, corestring_lwc_ftp,
748 			&match) == lwc_error_ok && match == true) {
749 		(*new_url)->components.scheme_type = NSURL_SCHEME_FTP;
750 	} else if (lwc_string_caseless_isequal(scheme, corestring_lwc_mailto,
751 			&match) == lwc_error_ok && match == true) {
752 		(*new_url)->components.scheme_type = NSURL_SCHEME_MAILTO;
753 	} else {
754 		(*new_url)->components.scheme_type = NSURL_SCHEME_OTHER;
755 	}
756 
757 	/* Get the nsurl's hash */
758 	nsurl__calc_hash(*new_url);
759 
760 	/* Give the URL a reference */
761 	(*new_url)->count = 1;
762 
763 	return NSERROR_OK;
764 }
765 
766 
767 /* exported interface documented in utils/nsurl.h */
nsurl_nice(const nsurl * url,char ** result,bool remove_extensions)768 nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions)
769 {
770 	const char *data;
771 	size_t len;
772 	size_t pos;
773 	bool match;
774 	char *name;
775 
776 	assert(url != NULL);
777 
778 	*result = 0;
779 
780 	/* extract the last component of the path, if possible */
781 	if ((url->components.path != NULL) &&
782 	    (lwc_string_length(url->components.path) != 0) &&
783 	    (lwc_string_isequal(url->components.path,
784 			corestring_lwc_slash_, &match) == lwc_error_ok) &&
785 	    (match == false)) {
786 		bool first = true;
787 		bool keep_looking;
788 
789 		/* Get hold of the string data we're examining */
790 		data = lwc_string_data(url->components.path);
791 		len = lwc_string_length(url->components.path);
792 		pos = len;
793 
794 		do {
795 			keep_looking = false;
796 			pos--;
797 
798 			/* Find last '/' with stuff after it */
799 			while (pos != 0) {
800 				if (data[pos] == '/' && pos < len - 1) {
801 					break;
802 				}
803 				pos--;
804 			}
805 
806 			if (pos == 0) {
807 				break;
808 			}
809 
810 			if (first) {
811 				if (strncasecmp("/default.", data + pos,
812 						SLEN("/default.")) == 0) {
813 					keep_looking = true;
814 
815 				} else if (strncasecmp("/index.",
816 							data + pos,
817 							6) == 0) {
818 					keep_looking = true;
819 
820 				}
821 				first = false;
822 			}
823 
824 		} while (keep_looking);
825 
826 		if (data[pos] == '/')
827 			pos++;
828 
829 		if (strncasecmp("default.", data + pos, 8) != 0 &&
830 				strncasecmp("index.", data + pos, 6) != 0) {
831 			size_t end = pos;
832 			while (data[end] != '\0' && data[end] != '/') {
833 				end++;
834 			}
835 			if (end - pos != 0) {
836 				name = malloc(end - pos + 1);
837 				if (name == NULL) {
838 					return NSERROR_NOMEM;
839 				}
840 				memcpy(name, data + pos, end - pos);
841 				name[end - pos] = '\0';
842 				if (remove_extensions) {
843 					/* strip any extenstion */
844 					char *dot = strchr(name, '.');
845 					if (dot && dot != name) {
846 						*dot = '\0';
847 					}
848 				}
849 				*result = name;
850 				return NSERROR_OK;
851 			}
852 		}
853 	}
854 
855 	if (url->components.host != NULL) {
856 		name = strdup(lwc_string_data(url->components.host));
857 
858 		for (pos = 0; name[pos] != '\0'; pos++) {
859 			if (name[pos] == '.') {
860 				name[pos] = '_';
861 			}
862 		}
863 
864 		*result = name;
865 		return NSERROR_OK;
866 	}
867 
868 	return NSERROR_NOT_FOUND;
869 }
870 
871 
872 /* exported interface, documented in nsurl.h */
nsurl_parent(const nsurl * url,nsurl ** new_url)873 nserror nsurl_parent(const nsurl *url, nsurl **new_url)
874 {
875 	lwc_string *lwc_path;
876 	size_t old_path_len, new_path_len;
877 	size_t len;
878 	const char* path = NULL;
879 	char *pos;
880 
881 	assert(url != NULL);
882 
883 	old_path_len = (url->components.path == NULL) ? 0 :
884 			lwc_string_length(url->components.path);
885 
886 	/* Find new path length */
887 	if (old_path_len == 0) {
888 		new_path_len = old_path_len;
889 	} else {
890 		path = lwc_string_data(url->components.path);
891 
892 		new_path_len = old_path_len;
893 		if (old_path_len > 1) {
894 			/* Skip over any trailing / */
895 			if (path[new_path_len - 1] == '/')
896 				new_path_len--;
897 
898 			/* Work back to next / */
899 			while (new_path_len > 0 &&
900 					path[new_path_len - 1] != '/')
901 				new_path_len--;
902 		}
903 	}
904 
905 	/* Find the length of new_url */
906 	len = url->length;
907 	if (url->components.query != NULL) {
908 		len -= lwc_string_length(url->components.query);
909 	}
910 	if (url->components.fragment != NULL) {
911 		len -= 1; /* # */
912 		len -= lwc_string_length(url->components.fragment);
913 	}
914 	len -= old_path_len - new_path_len;
915 
916 	/* Create NetSurf URL object */
917 	*new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */
918 	if (*new_url == NULL) {
919 		return NSERROR_NOMEM;
920 	}
921 
922 	/* Make new path */
923 	if (old_path_len == 0) {
924 		lwc_path = NULL;
925 	} else if (old_path_len == new_path_len) {
926 		lwc_path = lwc_string_ref(url->components.path);
927 	} else {
928 		if (lwc_intern_string(path, new_path_len,
929 				&lwc_path) != lwc_error_ok) {
930 			free(*new_url);
931 			return NSERROR_NOMEM;
932 		}
933 	}
934 
935 	(*new_url)->length = len;
936 
937 	/* Set string */
938 	pos = (*new_url)->string;
939 	memcpy(pos, url->string, len);
940 	pos += len;
941 	*pos = '\0';
942 
943 	/* Copy components */
944 	(*new_url)->components.scheme =
945 			nsurl__component_copy(url->components.scheme);
946 	(*new_url)->components.username =
947 			nsurl__component_copy(url->components.username);
948 	(*new_url)->components.password =
949 			nsurl__component_copy(url->components.password);
950 	(*new_url)->components.host =
951 			nsurl__component_copy(url->components.host);
952 	(*new_url)->components.port =
953 			nsurl__component_copy(url->components.port);
954 	(*new_url)->components.path = lwc_path;
955 	(*new_url)->components.query = NULL;
956 	(*new_url)->components.fragment = NULL;
957 
958 	(*new_url)->components.scheme_type = url->components.scheme_type;
959 
960 	/* Get the nsurl's hash */
961 	nsurl__calc_hash(*new_url);
962 
963 	/* Give the URL a reference */
964 	(*new_url)->count = 1;
965 
966 	return NSERROR_OK;
967 }
968 
969 /* exported interface, documented in nsurl.h */
nsurl_dump(const nsurl * url)970 void nsurl_dump(const nsurl *url)
971 {
972 	fprintf(stderr, "nsurl components for %p "
973 			"(refs: %i hash: %"PRIx32"):\n",
974 			url, url->count, url->hash);
975 
976 	if (url->components.scheme)
977 		fprintf(stderr, "  Scheme: %s\n",
978 				lwc_string_data(url->components.scheme));
979 	if (url->components.username)
980 		fprintf(stderr, "Username: %s\n",
981 				lwc_string_data(url->components.username));
982 	if (url->components.password)
983 		fprintf(stderr, "Password: %s\n",
984 				lwc_string_data(url->components.password));
985 	if (url->components.host)
986 		fprintf(stderr, "    Host: %s\n",
987 				lwc_string_data(url->components.host));
988 	if (url->components.port)
989 		fprintf(stderr, "    Port: %s\n",
990 				lwc_string_data(url->components.port));
991 	if (url->components.path)
992 		fprintf(stderr, "    Path: %s\n",
993 				lwc_string_data(url->components.path));
994 	if (url->components.query)
995 		fprintf(stderr, "   Query: %s\n",
996 				lwc_string_data(url->components.query));
997 	if (url->components.fragment)
998 		fprintf(stderr, "Fragment: %s\n",
999 				lwc_string_data(url->components.fragment));
1000 }
1001