1 /*
2 * Copyright 2011 Michael Drake <tlsa@netsurf-browser.org>
3 *
4 * This file is part of NetSurf, http://www.netsurf-browser.org/
5 *
6 * NetSurf is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * NetSurf is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /**
20 * \file
21 * NetSurf URL handling implementation.
22 *
23 * This is the common implementation of all URL handling within the
24 * browser. This implementation is based upon RFC3986 although this has
25 * been superceeded by https://url.spec.whatwg.org/ which is based on
26 * actual contemporary implementations.
27 *
28 * Care must be taken with character encodings within this module as
29 * the specifications work with specific ascii ranges and must not be
30 * affected by locale. Hence the c library character type functions
31 * are not used.
32 */
33
34 #include <assert.h>
35 #include <libwapcaplet/libwapcaplet.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <strings.h>
39
40 #include "netsurf/inttypes.h"
41
42 #include "utils/ascii.h"
43 #include "utils/corestrings.h"
44 #include "utils/errors.h"
45 #include "utils/idna.h"
46 #include "utils/log.h"
47 #include "utils/nsurl.h"
48 #include "utils/nsurl/private.h"
49 #include "utils/utils.h"
50
51
52 /** Marker set, indicating positions of sections within a URL string */
53 struct url_markers {
54 size_t start; /** start of URL */
55 size_t scheme_end;
56 size_t authority;
57
58 size_t colon_first;
59 size_t at;
60 size_t colon_last;
61
62 size_t path;
63 size_t query;
64 size_t fragment;
65
66 size_t end; /** end of URL */
67
68 enum nsurl_scheme_type scheme_type;
69 };
70
71
72 /** Sections of a URL */
73 enum url_sections {
74 URL_SCHEME,
75 URL_CREDENTIALS,
76 URL_HOST,
77 URL_PATH,
78 URL_QUERY,
79 URL_FRAGMENT
80 };
81
82
83 /**
84 * Return a hex digit for the given numerical value.
85 *
86 * \param digit the value to get the hex digit for.
87 * \return character in range 0-9A-F
88 */
digit2uppercase_hex(unsigned char digit)89 inline static char digit2uppercase_hex(unsigned char digit) {
90 assert(digit < 16);
91 return "0123456789ABCDEF"[digit];
92 }
93
94 /**
95 * determine if a character is unreserved
96 *
97 * \param c character to classify.
98 * \return true if the character is unreserved else false.
99 */
nsurl__is_unreserved(unsigned char c)100 static bool nsurl__is_unreserved(unsigned char c)
101 {
102 /* From RFC3986 section 2.3 (unreserved characters)
103 *
104 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
105 *
106 */
107 static const bool unreserved[256] = {
108 false, false, false, false, false, false, false, false, /* 00 */
109 false, false, false, false, false, false, false, false, /* 08 */
110 false, false, false, false, false, false, false, false, /* 10 */
111 false, false, false, false, false, false, false, false, /* 18 */
112 false, false, false, false, false, false, false, false, /* 20 */
113 false, false, false, false, false, true, true, false, /* 28 */
114 true, true, true, true, true, true, true, true, /* 30 */
115 true, true, false, false, false, false, false, false, /* 38 */
116 false, true, true, true, true, true, true, true, /* 40 */
117 true, true, true, true, true, true, true, true, /* 48 */
118 true, true, true, true, true, true, true, true, /* 50 */
119 true, true, true, false, false, false, false, true, /* 58 */
120 false, true, true, true, true, true, true, true, /* 60 */
121 true, true, true, true, true, true, true, true, /* 68 */
122 true, true, true, true, true, true, true, true, /* 70 */
123 true, true, true, false, false, false, true, false, /* 78 */
124 false, false, false, false, false, false, false, false, /* 80 */
125 false, false, false, false, false, false, false, false, /* 88 */
126 false, false, false, false, false, false, false, false, /* 90 */
127 false, false, false, false, false, false, false, false, /* 98 */
128 false, false, false, false, false, false, false, false, /* A0 */
129 false, false, false, false, false, false, false, false, /* A8 */
130 false, false, false, false, false, false, false, false, /* B0 */
131 false, false, false, false, false, false, false, false, /* B8 */
132 false, false, false, false, false, false, false, false, /* C0 */
133 false, false, false, false, false, false, false, false, /* C8 */
134 false, false, false, false, false, false, false, false, /* D0 */
135 false, false, false, false, false, false, false, false, /* D8 */
136 false, false, false, false, false, false, false, false, /* E0 */
137 false, false, false, false, false, false, false, false, /* E8 */
138 false, false, false, false, false, false, false, false, /* F0 */
139 false, false, false, false, false, false, false, false /* F8 */
140 };
141 return unreserved[c];
142 }
143
144 /**
145 * determine if a character should be percent escaped.
146 *
147 * The ASCII codes which should not be percent escaped
148 *
149 * \param c character to classify.
150 * \return true if the character should not be escaped else false.
151 */
nsurl__is_no_escape(unsigned char c)152 static bool nsurl__is_no_escape(unsigned char c)
153 {
154 static const bool no_escape[256] = {
155 false, false, false, false, false, false, false, false, /* 00 */
156 false, false, false, false, false, false, false, false, /* 08 */
157 false, false, false, false, false, false, false, false, /* 10 */
158 false, false, false, false, false, false, false, false, /* 18 */
159 false, true, false, true, true, false, true, true, /* 20 */
160 true, true, true, true, true, true, true, true, /* 28 */
161 true, true, true, true, true, true, true, true, /* 30 */
162 true, true, true, true, false, true, false, true, /* 38 */
163 true, true, true, true, true, true, true, true, /* 40 */
164 true, true, true, true, true, true, true, true, /* 48 */
165 true, true, true, true, true, true, true, true, /* 50 */
166 true, true, true, true, false, true, false, true, /* 58 */
167 false, true, true, true, true, true, true, true, /* 60 */
168 true, true, true, true, true, true, true, true, /* 68 */
169 true, true, true, true, true, true, true, true, /* 70 */
170 true, true, true, false, true, false, true, false, /* 78 */
171 false, false, false, false, false, false, false, false, /* 80 */
172 false, false, false, false, false, false, false, false, /* 88 */
173 false, false, false, false, false, false, false, false, /* 90 */
174 false, false, false, false, false, false, false, false, /* 98 */
175 false, false, false, false, false, false, false, false, /* A0 */
176 false, false, false, false, false, false, false, false, /* A8 */
177 false, false, false, false, false, false, false, false, /* B0 */
178 false, false, false, false, false, false, false, false, /* B8 */
179 false, false, false, false, false, false, false, false, /* C0 */
180 false, false, false, false, false, false, false, false, /* C8 */
181 false, false, false, false, false, false, false, false, /* D0 */
182 false, false, false, false, false, false, false, false, /* D8 */
183 false, false, false, false, false, false, false, false, /* E0 */
184 false, false, false, false, false, false, false, false, /* E8 */
185 false, false, false, false, false, false, false, false, /* F0 */
186 false, false, false, false, false, false, false, false, /* F8 */
187 };
188 return no_escape[c];
189 }
190
191
192 /**
193 * Obtains a set of markers delimiting sections in a URL string
194 *
195 * \param url_s URL string
196 * \param markers Updated to mark sections in the URL string
197 * \param joining True iff URL string is a relative URL for joining
198 */
nsurl__get_string_markers(const char * const url_s,struct url_markers * markers,bool joining)199 static void nsurl__get_string_markers(const char * const url_s,
200 struct url_markers *markers, bool joining)
201 {
202 const char *pos = url_s; /** current position in url_s */
203 bool is_http = false;
204 bool trailing_whitespace = false;
205
206 /* Initialise marker set */
207 struct url_markers marker = { 0, 0, 0, 0, 0, 0,
208 0, 0, 0, 0, NSURL_SCHEME_OTHER };
209
210 /* Skip any leading whitespace in url_s */
211 while (ascii_is_space(*pos))
212 pos++;
213
214 /* Record start point */
215 marker.start = pos - url_s;
216
217 marker.scheme_end = marker.authority = marker.colon_first = marker.at =
218 marker.colon_last = marker.path = marker.start;
219
220 if (*pos == '\0') {
221 /* Nothing but whitespace, early exit */
222 marker.query = marker.fragment = marker.end = marker.path;
223 *markers = marker;
224 return;
225 }
226
227 /* Get scheme */
228 if (ascii_is_alpha(*pos)) {
229 pos++;
230
231 while (*pos != ':' && *pos != '\0') {
232 if (!ascii_is_alphanumerical(*pos) && (*pos != '+') &&
233 (*pos != '-') && (*pos != '.')) {
234 /* This character is not valid in the
235 * scheme */
236 break;
237 }
238 pos++;
239 }
240
241 if (*pos == ':') {
242 /* This delimits the end of the scheme */
243 size_t off;
244
245 marker.scheme_end = pos - url_s;
246
247 off = marker.scheme_end - marker.start;
248
249 /* Detect http(s) and mailto for scheme specifc
250 * normalisation */
251 if (off == SLEN("http") &&
252 (((*(pos - off + 0) == 'h') ||
253 (*(pos - off + 0) == 'H')) &&
254 ((*(pos - off + 1) == 't') ||
255 (*(pos - off + 1) == 'T')) &&
256 ((*(pos - off + 2) == 't') ||
257 (*(pos - off + 2) == 'T')) &&
258 ((*(pos - off + 3) == 'p') ||
259 (*(pos - off + 3) == 'P')))) {
260 marker.scheme_type = NSURL_SCHEME_HTTP;
261 is_http = true;
262 } else if (off == SLEN("https") &&
263 (((*(pos - off + 0) == 'h') ||
264 (*(pos - off + 0) == 'H')) &&
265 ((*(pos - off + 1) == 't') ||
266 (*(pos - off + 1) == 'T')) &&
267 ((*(pos - off + 2) == 't') ||
268 (*(pos - off + 2) == 'T')) &&
269 ((*(pos - off + 3) == 'p') ||
270 (*(pos - off + 3) == 'P')) &&
271 ((*(pos - off + 4) == 's') ||
272 (*(pos - off + 4) == 'S')))) {
273 marker.scheme_type = NSURL_SCHEME_HTTPS;
274 is_http = true;
275 } else if (off == SLEN("file") &&
276 (((*(pos - off + 0) == 'f') ||
277 (*(pos - off + 0) == 'F')) &&
278 ((*(pos - off + 1) == 'i') ||
279 (*(pos - off + 1) == 'I')) &&
280 ((*(pos - off + 2) == 'l') ||
281 (*(pos - off + 2) == 'L')) &&
282 ((*(pos - off + 3) == 'e') ||
283 (*(pos - off + 3) == 'E')))) {
284 marker.scheme_type = NSURL_SCHEME_FILE;
285 } else if (off == SLEN("ftp") &&
286 (((*(pos - off + 0) == 'f') ||
287 (*(pos - off + 0) == 'F')) &&
288 ((*(pos - off + 1) == 't') ||
289 (*(pos - off + 1) == 'T')) &&
290 ((*(pos - off + 2) == 'p') ||
291 (*(pos - off + 2) == 'P')))) {
292 marker.scheme_type = NSURL_SCHEME_FTP;
293 } else if (off == SLEN("mailto") &&
294 (((*(pos - off + 0) == 'm') ||
295 (*(pos - off + 0) == 'M')) &&
296 ((*(pos - off + 1) == 'a') ||
297 (*(pos - off + 1) == 'A')) &&
298 ((*(pos - off + 2) == 'i') ||
299 (*(pos - off + 2) == 'I')) &&
300 ((*(pos - off + 3) == 'l') ||
301 (*(pos - off + 3) == 'L')) &&
302 ((*(pos - off + 4) == 't') ||
303 (*(pos - off + 4) == 'T')) &&
304 ((*(pos - off + 5) == 'o') ||
305 (*(pos - off + 5) == 'O')))) {
306 marker.scheme_type = NSURL_SCHEME_MAILTO;
307 } else if (off == SLEN("data") &&
308 (((*(pos - off + 0) == 'd') ||
309 (*(pos - off + 0) == 'D')) &&
310 ((*(pos - off + 1) == 'a') ||
311 (*(pos - off + 1) == 'A')) &&
312 ((*(pos - off + 2) == 't') ||
313 (*(pos - off + 2) == 'T')) &&
314 ((*(pos - off + 3) == 'a') ||
315 (*(pos - off + 3) == 'A')))) {
316 marker.scheme_type = NSURL_SCHEME_DATA;
317 }
318
319 /* Skip over colon */
320 pos++;
321
322 /* Mark place as start of authority */
323 marker.authority = marker.colon_first = marker.at =
324 marker.colon_last = marker.path =
325 pos - url_s;
326
327 } else {
328 /* Not found a scheme */
329 if (joining == false) {
330 /* Assuming no scheme == http */
331 marker.scheme_type = NSURL_SCHEME_HTTP;
332 is_http = true;
333 }
334 }
335 }
336
337 /* Get authority
338 *
339 * Two slashes always indicates the start of an authority.
340 *
341 * We are more relaxed in the case of http:
342 * a. when joining, one or more slashes indicates start of authority
343 * b. when not joining, we assume authority if no scheme was present
344 * and in the case of mailto: when we assume there is an authority.
345 */
346 if ((*pos == '/' && *(pos + 1) == '/') ||
347 (is_http && ((joining && *pos == '/') ||
348 (joining == false &&
349 marker.scheme_end != marker.start))) ||
350 marker.scheme_type == NSURL_SCHEME_MAILTO) {
351
352 /* Skip over leading slashes */
353 if (*pos == '/') {
354 if (is_http == false) {
355 if (*pos == '/') pos++;
356 if (*pos == '/') pos++;
357 } else {
358 while (*pos == '/')
359 pos++;
360 }
361
362 marker.authority = marker.colon_first = marker.at =
363 marker.colon_last = marker.path =
364 pos - url_s;
365 }
366
367 /* Need to get (or complete) the authority */
368 while (*pos != '\0') {
369 if (*pos == '/' || *pos == '?' || *pos == '#') {
370 /* End of the authority */
371 break;
372
373 } else if (marker.scheme_type != NSURL_SCHEME_MAILTO &&
374 *pos == ':' && marker.colon_first ==
375 marker.authority) {
376 /* could be username:password or host:port
377 * separator */
378 marker.colon_first = pos - url_s;
379
380 } else if (marker.scheme_type != NSURL_SCHEME_MAILTO &&
381 *pos == ':' && marker.colon_first !=
382 marker.authority) {
383 /* could be host:port separator */
384 marker.colon_last = pos - url_s;
385
386 } else if (*pos == '@' && marker.at ==
387 marker.authority) {
388 /* Credentials @ host separator */
389 marker.at = pos - url_s;
390 }
391
392 pos++;
393 }
394
395 marker.path = pos - url_s;
396
397 } else if ((*pos == '\0' || *pos == '/') &&
398 joining == false && is_http == true) {
399 marker.path = pos - url_s;
400 }
401
402 /* Get path
403 *
404 * Needs to start with '/' if there's no authority
405 */
406 if (*pos == '/' || ((marker.path == marker.authority) &&
407 (*pos != '?') && (*pos != '#') && (*pos != '\0'))) {
408 while (*(++pos) != '\0') {
409 if (*pos == '?' || *pos == '#') {
410 /* End of the path */
411 break;
412 }
413 }
414 }
415
416 marker.query = pos - url_s;
417
418 /* Get query */
419 if (*pos == '?') {
420 while (*(++pos) != '\0') {
421 if (*pos == '#') {
422 /* End of the query */
423 break;
424 }
425 }
426 }
427
428 marker.fragment = pos - url_s;
429
430 /* Get fragment */
431 if (*pos == '#') {
432 while (*(++pos) != '\0')
433 ;
434 }
435
436 /* We got to the end of url_s.
437 * Need to skip back over trailing whitespace to find end of URL */
438 pos--;
439 if (pos >= url_s && ascii_is_space(*pos)) {
440 trailing_whitespace = true;
441 while (pos >= url_s && ascii_is_space(*pos))
442 pos--;
443 }
444
445 marker.end = pos + 1 - url_s;
446
447 if (trailing_whitespace == true) {
448 /* Ensure last url section doesn't pass end */
449 if (marker.fragment > marker.end)
450 marker.fragment = marker.end;
451 if (marker.query > marker.end)
452 marker.query = marker.end;
453 if (marker.path > marker.end)
454 marker.path = marker.end;
455 if (marker.colon_last > marker.end)
456 marker.colon_last = marker.end;
457 if (marker.at > marker.end)
458 marker.at = marker.end;
459 if (marker.colon_last > marker.end)
460 marker.colon_last = marker.end;
461 if (marker.fragment > marker.end)
462 marker.fragment = marker.end;
463 }
464
465 NSLOG(netsurf, DEEPDEBUG,
466 "marker.start: %"PRIsizet, marker.start);
467 NSLOG(netsurf, DEEPDEBUG,
468 "marker.scheme_end: %"PRIsizet, marker.scheme_end);
469 NSLOG(netsurf, DEEPDEBUG,
470 "marker.authority: %"PRIsizet, marker.authority);
471
472 NSLOG(netsurf, DEEPDEBUG,
473 "marker.colon_first: %"PRIsizet, marker.colon_first);
474 NSLOG(netsurf, DEEPDEBUG,
475 "marker.at: %"PRIsizet, marker.at);
476 NSLOG(netsurf, DEEPDEBUG,
477 "marker.colon_last: %"PRIsizet, marker.colon_last);
478
479 NSLOG(netsurf, DEEPDEBUG,
480 "marker.path: %"PRIsizet, marker.path);
481 NSLOG(netsurf, DEEPDEBUG,
482 "marker.query: %"PRIsizet, marker.query);
483 NSLOG(netsurf, DEEPDEBUG,
484 "marker.fragment: %"PRIsizet, marker.fragment);
485
486 NSLOG(netsurf, DEEPDEBUG,
487 "marker.end: %"PRIsizet, marker.end);
488
489 /* Got all the URL components pegged out now */
490 *markers = marker;
491 }
492
493
494 /**
495 * Remove dot segments from a path, as per rfc 3986, 5.2.4
496 *
497 * \param path path to remove dot segments from ('\0' terminated)
498 * \param output path with dot segments removed
499 * \return size of output
500 */
nsurl__remove_dot_segments(char * path,char * output)501 static size_t nsurl__remove_dot_segments(char *path, char *output)
502 {
503 char *path_pos = path;
504 char *output_pos = output;
505
506 while (*path_pos != '\0') {
507 NSLOG(netsurf, DEEPDEBUG, " in:%s", path_pos);
508 NSLOG(netsurf, DEEPDEBUG, "out:%.*s",
509 (int)(output_pos - output), output);
510
511 if (*path_pos == '.') {
512 if (*(path_pos + 1) == '.' &&
513 *(path_pos + 2) == '/') {
514 /* Found prefix of "../" */
515 path_pos += SLEN("../");
516 continue;
517
518 } else if (*(path_pos + 1) == '/') {
519 /* Found prefix of "./" */
520 path_pos += SLEN("./");
521 continue;
522 }
523 } else if (*path_pos == '/' && *(path_pos + 1) == '.') {
524 if (*(path_pos + 2) == '/') {
525 /* Found prefix of "/./" */
526 path_pos += SLEN("/.");
527 continue;
528
529 } else if (*(path_pos + 2) == '\0') {
530 /* Found "/." at end of path */
531 *(output_pos++) = '/';
532
533 /* End of input path */
534 break;
535
536 } else if (*(path_pos + 2) == '.') {
537 if (*(path_pos + 3) == '/') {
538 /* Found prefix of "/../" */
539 path_pos += SLEN("/..");
540
541 if (output_pos > output)
542 output_pos--;
543 while (output_pos > output &&
544 *output_pos != '/')
545 output_pos--;
546
547 continue;
548
549 } else if (*(path_pos + 3) == '\0') {
550 /* Found "/.." at end of path */
551
552 while (output_pos > output &&
553 *(output_pos -1 ) !='/')
554 output_pos--;
555
556 /* End of input path */
557 break;
558 }
559 }
560 } else if (*path_pos == '.') {
561 if (*(path_pos + 1) == '\0') {
562 /* Found "." at end of path */
563
564 /* End of input path */
565 break;
566
567 } else if (*(path_pos + 1) == '.' &&
568 *(path_pos + 2) == '\0') {
569 /* Found ".." at end of path */
570
571 /* End of input path */
572 break;
573 }
574 }
575 /* Copy first character into output path */
576 *output_pos++ = *path_pos++;
577
578 /* Copy up to but not including next '/' */
579 while ((*path_pos != '/') && (*path_pos != '\0'))
580 *output_pos++ = *path_pos++;
581 }
582
583 return output_pos - output;
584 }
585
586
587 /**
588 * Get the length of the longest section
589 *
590 * \param m markers delimiting url sections in a string
591 * \return the length of the longest section
592 */
nsurl__get_longest_section(struct url_markers * m)593 static size_t nsurl__get_longest_section(struct url_markers *m)
594 {
595 size_t length = m->scheme_end - m->start; /* scheme */
596
597 if (length < m->at - m->authority) /* credentials */
598 length = m->at - m->authority;
599
600 if (length < m->path - m->at) /* host */
601 length = m->path - m->at;
602
603 if (length < m->query - m->path) /* path */
604 length = m->query - m->path;
605
606 if (length < m->fragment - m->query) /* query */
607 length = m->fragment - m->query;
608
609 if (length < m->end - m->fragment) /* fragment */
610 length = m->end - m->fragment;
611
612 return length;
613 }
614
615
616 /**
617 * Create the components of a NetSurf URL object for a section of a URL string
618 *
619 * \param url_s URL string
620 * \param section Sets which section of URL string is to be normalised
621 * \param pegs Set of markers delimiting the URL string's sections
622 * \param pos_norm A buffer large enough for the normalised string (*3 + 1)
623 * \param url A NetSurf URL object, to which components may be added
624 * \return NSERROR_OK on success, appropriate error otherwise
625 *
626 * The section of url_s is normalised appropriately.
627 */
nsurl__create_from_section(const char * const url_s,const enum url_sections section,const struct url_markers * pegs,char * pos_norm,struct nsurl_components * url)628 static nserror nsurl__create_from_section(const char * const url_s,
629 const enum url_sections section,
630 const struct url_markers *pegs,
631 char *pos_norm,
632 struct nsurl_components *url)
633 {
634 nserror ret;
635 int ascii_offset;
636 int start = 0;
637 int end = 0;
638 const char *pos;
639 const char *pos_url_s;
640 char *norm_start = pos_norm;
641 char *host;
642 size_t copy_len;
643 size_t length;
644 size_t host_len;
645 enum {
646 NSURL_F_NO_PORT = (1 << 0)
647 } flags = 0;
648
649 switch (section) {
650 case URL_SCHEME:
651 start = pegs->start;
652 end = pegs->scheme_end;
653 break;
654
655 case URL_CREDENTIALS:
656 start = pegs->authority;
657 end = pegs->at;
658 break;
659
660 case URL_HOST:
661 start = (pegs->at == pegs->authority &&
662 *(url_s + pegs->at) != '@') ?
663 pegs->at :
664 pegs->at + 1;
665 end = pegs->path;
666 break;
667
668 case URL_PATH:
669 start = pegs->path;
670 end = pegs->query;
671 break;
672
673 case URL_QUERY:
674 start = (*(url_s + pegs->query) != '?') ?
675 pegs->query :
676 pegs->query + 1;
677 end = pegs->fragment;
678 break;
679
680 case URL_FRAGMENT:
681 start = (*(url_s + pegs->fragment) != '#') ?
682 pegs->fragment :
683 pegs->fragment + 1;
684 end = pegs->end;
685 break;
686 }
687
688 if (end < start)
689 end = start;
690
691 length = end - start;
692
693 /* Stage 1: Normalise the required section */
694
695 pos = pos_url_s = url_s + start;
696 copy_len = 0;
697 for (; pos < url_s + end; pos++) {
698 if (*pos == '%' && (pos + 2 < url_s + end)) {
699 /* Might be an escaped character needing unescaped */
700
701 /* Find which character which was escaped */
702 ascii_offset = ascii_hex_to_value_2_chars(*(pos + 1),
703 *(pos + 2));
704
705 if (ascii_offset < 0) {
706 /* % with invalid hex digits. */
707 copy_len++;
708 continue;
709 }
710
711 if ((section != URL_SCHEME && section != URL_HOST) &&
712 (nsurl__is_unreserved(ascii_offset) == false)) {
713 /* This character should be escaped after all,
714 * just let it get copied */
715 copy_len += 3;
716 pos += 2;
717 continue;
718 }
719
720 if (copy_len > 0) {
721 /* Copy up to here */
722 memcpy(pos_norm, pos_url_s, copy_len);
723 pos_norm += copy_len;
724 copy_len = 0;
725 }
726
727 /* Put the unescaped character in the normalised URL */
728 *(pos_norm++) = (char)ascii_offset;
729 pos += 2;
730 pos_url_s = pos + 1;
731
732 length -= 2;
733
734 } else if ((section != URL_SCHEME && section != URL_HOST) &&
735 (nsurl__is_no_escape(*pos) == false)) {
736
737 /* This needs to be escaped */
738 if (copy_len > 0) {
739 /* Copy up to here */
740 memcpy(pos_norm, pos_url_s, copy_len);
741 pos_norm += copy_len;
742 copy_len = 0;
743 }
744
745 /* escape */
746 *(pos_norm++) = '%';
747 *(pos_norm++) = digit2uppercase_hex(
748 ((unsigned char)*pos) >> 4);
749 *(pos_norm++) = digit2uppercase_hex(
750 ((unsigned char)*pos) & 0xf);
751 pos_url_s = pos + 1;
752
753 length += 2;
754
755 } else if ((section == URL_SCHEME || section == URL_HOST) &&
756 ascii_is_alpha_upper(*pos)) {
757 /* Lower case this letter */
758
759 if (copy_len > 0) {
760 /* Copy up to here */
761 memcpy(pos_norm, pos_url_s, copy_len);
762 pos_norm += copy_len;
763 copy_len = 0;
764 }
765 /* Copy lower cased letter into normalised URL */
766 *(pos_norm++) = ascii_to_lower(*pos);
767 pos_url_s = pos + 1;
768
769 } else {
770 /* This character is safe in normalised URL */
771 copy_len++;
772 }
773 }
774
775 if (copy_len > 0) {
776 /* Copy up to here */
777 memcpy(pos_norm, pos_url_s, copy_len);
778 pos_norm += copy_len;
779 }
780
781 /* Mark end of section */
782 (*pos_norm) = '\0';
783
784 /* Stage 2: Create the URL components for the required section */
785 switch (section) {
786 case URL_SCHEME:
787 if (length == 0) {
788 /* No scheme, assuming http */
789 url->scheme = lwc_string_ref(corestring_lwc_http);
790 } else {
791 /* Add scheme to URL */
792 if (lwc_intern_string(norm_start, length,
793 &url->scheme) != lwc_error_ok) {
794 return NSERROR_NOMEM;
795 }
796 }
797
798 break;
799
800 case URL_CREDENTIALS:
801 url->username = NULL;
802 url->password = NULL;
803
804 /* file: URLs don't have credentials */
805 if (url->scheme_type == NSURL_SCHEME_FILE) {
806 break;
807 }
808
809 if (length != 0 && *norm_start != ':') {
810 char *sec_start = norm_start;
811 if (pegs->colon_first != pegs->authority &&
812 pegs->at > pegs->colon_first + 1) {
813 /* there's a password */
814 sec_start += pegs->colon_first -
815 pegs->authority + 1;
816 if (lwc_intern_string(sec_start,
817 pegs->at - pegs->colon_first -1,
818 &url->password) !=
819 lwc_error_ok) {
820 return NSERROR_NOMEM;
821 }
822
823 /* update start pos and length for username */
824 sec_start = norm_start;
825 length -= pegs->at - pegs->colon_first;
826 } else if (pegs->colon_first != pegs->authority &&
827 pegs->at == pegs->colon_first + 1) {
828 /* strip username colon */
829 length--;
830 }
831
832 /* Username */
833 if (lwc_intern_string(sec_start, length,
834 &url->username) != lwc_error_ok) {
835 return NSERROR_NOMEM;
836 }
837 }
838
839 break;
840
841 case URL_HOST:
842 url->host = NULL;
843 url->port = NULL;
844
845 /* file: URLs don't have a host */
846 if (url->scheme_type == NSURL_SCHEME_FILE) {
847 break;
848 }
849
850 if (length != 0) {
851 size_t colon = 0;
852 char *sec_start = norm_start;
853 if (pegs->at < pegs->colon_first &&
854 pegs->colon_last == pegs->authority) {
855 /* There's one colon and it's after @ marker */
856 colon = pegs->colon_first;
857 } else if (pegs->colon_last != pegs->authority) {
858 /* There's more than one colon */
859 colon = pegs->colon_last;
860 } else {
861 /* There's no colon that could be a port
862 * separator */
863 flags |= NSURL_F_NO_PORT;
864 }
865
866 if (!(flags & NSURL_F_NO_PORT)) {
867 /* Determine whether colon is a port separator
868 */
869 sec_start += colon - pegs->at;
870 while (++sec_start < norm_start + length) {
871 if (!ascii_is_digit(*sec_start)) {
872 /* Character after port isn't a
873 * digit; not a port separator
874 */
875 flags |= NSURL_F_NO_PORT;
876 break;
877 }
878 }
879 }
880
881 if (!(flags & NSURL_F_NO_PORT)) {
882 /* There's a port */
883 size_t skip = (pegs->at == pegs->authority) ?
884 1 : 0;
885 sec_start = norm_start + colon - pegs->at +
886 skip;
887 if (url->scheme != NULL &&
888 url->scheme_type ==
889 NSURL_SCHEME_HTTP &&
890 length -
891 (colon - pegs->at + skip) == 2 &&
892 *sec_start == '8' &&
893 *(sec_start + 1) == '0') {
894 /* Scheme is http, and port is default
895 * (80) */
896 flags |= NSURL_F_NO_PORT;
897 }
898
899 if (length <= (colon - pegs->at + skip)) {
900 /* No space for a port after the colon
901 */
902 flags |= NSURL_F_NO_PORT;
903 }
904
905 /* Add non-redundant ports to NetSurf URL */
906 sec_start = norm_start + colon - pegs->at +
907 skip;
908 if (!(flags & NSURL_F_NO_PORT) &&
909 lwc_intern_string(sec_start,
910 length -
911 (colon - pegs->at + skip),
912 &url->port) != lwc_error_ok) {
913 return NSERROR_NOMEM;
914 }
915
916 /* update length for host */
917 skip = (pegs->at == pegs->authority) ? 0 : 1;
918 length = colon - pegs->at - skip;
919 }
920
921 /* host */
922 /* Encode host according to IDNA2008 */
923 ret = idna_encode(norm_start, length, &host, &host_len);
924 if (ret == NSERROR_OK) {
925 /* valid idna encoding */
926 if (lwc_intern_string(host, host_len,
927 &url->host) != lwc_error_ok) {
928 return NSERROR_NOMEM;
929 }
930 free(host);
931 } else {
932 /* fall back to straight interning */
933 if (lwc_intern_string(norm_start, length,
934 &url->host) != lwc_error_ok) {
935 return NSERROR_NOMEM;
936 }
937 }
938 }
939
940 break;
941
942 case URL_PATH:
943 if (length != 0) {
944 if (lwc_intern_string(norm_start, length,
945 &url->path) != lwc_error_ok) {
946 return NSERROR_NOMEM;
947 }
948 } else if ((url->host != NULL &&
949 url->scheme_type != NSURL_SCHEME_MAILTO) ||
950 url->scheme_type == NSURL_SCHEME_FILE) {
951 /* Set empty path to "/" if:
952 * - there's a host and its not a mailto: URL
953 * - its a file: URL
954 */
955 if (lwc_intern_string("/", SLEN("/"),
956 &url->path) != lwc_error_ok) {
957 return NSERROR_NOMEM;
958 }
959 } else {
960 url->path = NULL;
961 }
962
963 break;
964
965 case URL_QUERY:
966 if (length != 0) {
967 if (lwc_intern_string(norm_start, length,
968 &url->query) != lwc_error_ok) {
969 return NSERROR_NOMEM;
970 }
971 } else {
972 url->query = NULL;
973 }
974
975 break;
976
977 case URL_FRAGMENT:
978 if (length != 0) {
979 if (lwc_intern_string(norm_start, length,
980 &url->fragment) != lwc_error_ok) {
981 return NSERROR_NOMEM;
982 }
983 } else {
984 url->fragment = NULL;
985 }
986
987 break;
988 }
989
990 return NSERROR_OK;
991 }
992
993
994 /**
995 * Get nsurl string info; total length, component lengths, & components present
996 *
997 * \param url NetSurf URL components
998 * \param parts Which parts of the URL are required in the string
999 * \param url_l Updated to total string length
1000 * \param lengths Updated with individual component lengths
1001 * \param pflags Updated to contain relevant string flags
1002 */
nsurl__get_string_data(const struct nsurl_components * url,nsurl_component parts,size_t * url_l,struct nsurl_component_lengths * lengths,enum nsurl_string_flags * pflags)1003 static void nsurl__get_string_data(const struct nsurl_components *url,
1004 nsurl_component parts, size_t *url_l,
1005 struct nsurl_component_lengths *lengths,
1006 enum nsurl_string_flags *pflags)
1007 {
1008 enum nsurl_string_flags flags = *pflags;
1009 *url_l = 0;
1010
1011 /* Intersection of required parts and available parts gives
1012 * the output parts */
1013 if (url->scheme && parts & NSURL_SCHEME) {
1014 flags |= NSURL_F_SCHEME;
1015
1016 lengths->scheme = lwc_string_length(url->scheme);
1017 *url_l += lengths->scheme;
1018 }
1019
1020 if (url->username && parts & NSURL_USERNAME) {
1021 flags |= NSURL_F_USERNAME;
1022
1023 lengths->username = lwc_string_length(url->username);
1024 *url_l += lengths->username;
1025 }
1026
1027 if (url->password && parts & NSURL_PASSWORD) {
1028 flags |= NSURL_F_PASSWORD;
1029
1030 lengths->password = lwc_string_length(url->password);
1031 *url_l += SLEN(":") + lengths->password;
1032 }
1033
1034 if (url->host && parts & NSURL_HOST) {
1035 flags |= NSURL_F_HOST;
1036
1037 lengths->host = lwc_string_length(url->host);
1038 *url_l += lengths->host;
1039 }
1040
1041 if (url->port && parts & NSURL_PORT) {
1042 flags |= NSURL_F_PORT;
1043
1044 lengths->port = lwc_string_length(url->port);
1045 *url_l += SLEN(":") + lengths->port;
1046 }
1047
1048 if (url->path && parts & NSURL_PATH) {
1049 flags |= NSURL_F_PATH;
1050
1051 lengths->path = lwc_string_length(url->path);
1052 *url_l += lengths->path;
1053 }
1054
1055 if (url->query && parts & NSURL_QUERY) {
1056 flags |= NSURL_F_QUERY;
1057
1058 lengths->query = lwc_string_length(url->query);
1059 *url_l += lengths->query;
1060 }
1061
1062 if (url->fragment && parts & NSURL_FRAGMENT) {
1063 flags |= NSURL_F_FRAGMENT;
1064
1065 lengths->fragment = lwc_string_length(url->fragment);
1066 *url_l += lengths->fragment;
1067 }
1068
1069 /* Turn on any spanned punctuation */
1070 if ((flags & NSURL_F_SCHEME) && (parts > NSURL_SCHEME)) {
1071 flags |= NSURL_F_SCHEME_PUNCTUATION;
1072
1073 *url_l += SLEN(":");
1074 }
1075
1076 if ((flags & NSURL_F_SCHEME) && (flags > NSURL_F_SCHEME) &&
1077 url->path && lwc_string_data(url->path)[0] == '/') {
1078 flags |= NSURL_F_AUTHORITY_PUNCTUATION;
1079
1080 *url_l += SLEN("//");
1081 }
1082
1083 if ((flags & (NSURL_F_USERNAME | NSURL_F_PASSWORD)) &&
1084 flags & NSURL_F_HOST) {
1085 flags |= NSURL_F_CREDENTIALS_PUNCTUATION;
1086
1087 *url_l += SLEN("@");
1088 }
1089
1090 /* spanned query question mark */
1091 if ((flags & ~(NSURL_F_QUERY | NSURL_F_FRAGMENT)) &&
1092 (flags & NSURL_F_QUERY)) {
1093 flags |= NSURL_F_QUERY_PUNCTUATION;
1094
1095 *url_l += SLEN("?");
1096 }
1097
1098 /* spanned fragment hash mark */
1099 if ((flags & ~NSURL_F_FRAGMENT) && (flags & NSURL_F_FRAGMENT)) {
1100 flags |= NSURL_F_FRAGMENT_PUNCTUATION;
1101
1102 *url_l += SLEN("#");
1103 }
1104
1105 *pflags = flags;
1106 }
1107
1108
1109 /**
1110 * Copy url string into provided buffer
1111 *
1112 * \param url NetSurf URL components
1113 * \param url_s Updated to contain the string
1114 * \param l Individual component lengths
1115 * \param flags String flags
1116 */
nsurl__get_string(const struct nsurl_components * url,char * url_s,struct nsurl_component_lengths * l,enum nsurl_string_flags flags)1117 static void nsurl__get_string(const struct nsurl_components *url, char *url_s,
1118 struct nsurl_component_lengths *l,
1119 enum nsurl_string_flags flags)
1120 {
1121 char *pos;
1122
1123 /* Copy the required parts into the url string */
1124 pos = url_s;
1125
1126 if (flags & NSURL_F_SCHEME) {
1127 memcpy(pos, lwc_string_data(url->scheme), l->scheme);
1128 pos += l->scheme;
1129 }
1130
1131 if (flags & NSURL_F_SCHEME_PUNCTUATION) {
1132 *(pos++) = ':';
1133 }
1134
1135 if (flags & NSURL_F_AUTHORITY_PUNCTUATION) {
1136 *(pos++) = '/';
1137 *(pos++) = '/';
1138 }
1139
1140 if (flags & NSURL_F_USERNAME) {
1141 memcpy(pos, lwc_string_data(url->username), l->username);
1142 pos += l->username;
1143 }
1144
1145 if (flags & NSURL_F_PASSWORD) {
1146 *(pos++) = ':';
1147 memcpy(pos, lwc_string_data(url->password), l->password);
1148 pos += l->password;
1149 }
1150
1151 if (flags & NSURL_F_CREDENTIALS_PUNCTUATION) {
1152 *(pos++) = '@';
1153 }
1154
1155 if (flags & NSURL_F_HOST) {
1156 memcpy(pos, lwc_string_data(url->host), l->host);
1157 pos += l->host;
1158 }
1159
1160 if (flags & NSURL_F_PORT) {
1161 *(pos++) = ':';
1162 memcpy(pos, lwc_string_data(url->port), l->port);
1163 pos += l->port;
1164 }
1165
1166 if (flags & NSURL_F_PATH) {
1167 memcpy(pos, lwc_string_data(url->path), l->path);
1168 pos += l->path;
1169 }
1170
1171 if (flags & NSURL_F_QUERY) {
1172 if (flags & NSURL_F_QUERY_PUNCTUATION)
1173 *(pos++) = '?';
1174 memcpy(pos, lwc_string_data(url->query), l->query);
1175 pos += l->query;
1176 }
1177
1178 if (flags & NSURL_F_FRAGMENT) {
1179 if (flags & NSURL_F_FRAGMENT_PUNCTUATION)
1180 *(pos++) = '#';
1181 memcpy(pos, lwc_string_data(url->fragment), l->fragment);
1182 pos += l->fragment;
1183 }
1184
1185 *pos = '\0';
1186 }
1187
1188
1189 /* exported interface, documented in nsurl.h */
nsurl__components_to_string(const struct nsurl_components * components,nsurl_component parts,size_t pre_padding,char ** url_s_out,size_t * url_l_out)1190 nserror nsurl__components_to_string(
1191 const struct nsurl_components *components,
1192 nsurl_component parts, size_t pre_padding,
1193 char **url_s_out, size_t *url_l_out)
1194 {
1195 struct nsurl_component_lengths str_len = { 0, 0, 0, 0, 0, 0, 0, 0 };
1196 enum nsurl_string_flags str_flags = 0;
1197 size_t url_l;
1198 char *url_s;
1199
1200 assert(components != NULL);
1201
1202 /* Get the string length and find which parts of url need copied */
1203 nsurl__get_string_data(components, parts, &url_l,
1204 &str_len, &str_flags);
1205
1206 if (url_l == 0) {
1207 return NSERROR_BAD_URL;
1208 }
1209
1210 /* Allocate memory for url string */
1211 url_s = malloc(pre_padding + url_l + 1); /* adding 1 for '\0' */
1212 if (url_s == NULL) {
1213 return NSERROR_NOMEM;
1214 }
1215
1216 /* Copy the required parts into the url string */
1217 nsurl__get_string(components, url_s + pre_padding, &str_len, str_flags);
1218
1219 *url_s_out = url_s;
1220 *url_l_out = url_l;
1221
1222 return NSERROR_OK;
1223 }
1224
1225
1226 /**
1227 * Calculate hash value
1228 *
1229 * \param url NetSurf URL object to set hash value for
1230 */
nsurl__calc_hash(nsurl * url)1231 void nsurl__calc_hash(nsurl *url)
1232 {
1233 uint32_t hash = 0;
1234
1235 if (url->components.scheme)
1236 hash ^= lwc_string_hash_value(url->components.scheme);
1237
1238 if (url->components.username)
1239 hash ^= lwc_string_hash_value(url->components.username);
1240
1241 if (url->components.password)
1242 hash ^= lwc_string_hash_value(url->components.password);
1243
1244 if (url->components.host)
1245 hash ^= lwc_string_hash_value(url->components.host);
1246
1247 if (url->components.port)
1248 hash ^= lwc_string_hash_value(url->components.port);
1249
1250 if (url->components.path)
1251 hash ^= lwc_string_hash_value(url->components.path);
1252
1253 if (url->components.query)
1254 hash ^= lwc_string_hash_value(url->components.query);
1255
1256 url->hash = hash;
1257 }
1258
1259
1260 /******************************************************************************
1261 * NetSurf URL Public API *
1262 ******************************************************************************/
1263
1264 /* exported interface, documented in nsurl.h */
nsurl_create(const char * const url_s,nsurl ** url)1265 nserror nsurl_create(const char * const url_s, nsurl **url)
1266 {
1267 struct url_markers m;
1268 struct nsurl_components c;
1269 size_t length;
1270 char *buff;
1271 nserror e = NSERROR_OK;
1272 bool match;
1273
1274 assert(url_s != NULL);
1275
1276 /* Peg out the URL sections */
1277 nsurl__get_string_markers(url_s, &m, false);
1278
1279 /* Get the length of the longest section */
1280 length = nsurl__get_longest_section(&m);
1281
1282 /* Allocate enough memory to url escape the longest section */
1283 buff = malloc(length * 3 + 1);
1284 if (buff == NULL)
1285 return NSERROR_NOMEM;
1286
1287 /* Set scheme type */
1288 c.scheme_type = m.scheme_type;
1289
1290 /* Build NetSurf URL object from sections */
1291 e |= nsurl__create_from_section(url_s, URL_SCHEME, &m, buff, &c);
1292 e |= nsurl__create_from_section(url_s, URL_CREDENTIALS, &m, buff, &c);
1293 e |= nsurl__create_from_section(url_s, URL_HOST, &m, buff, &c);
1294 e |= nsurl__create_from_section(url_s, URL_PATH, &m, buff, &c);
1295 e |= nsurl__create_from_section(url_s, URL_QUERY, &m, buff, &c);
1296 e |= nsurl__create_from_section(url_s, URL_FRAGMENT, &m, buff, &c);
1297
1298 /* Finished with buffer */
1299 free(buff);
1300
1301 if (e != NSERROR_OK) {
1302 nsurl__components_destroy(&c);
1303 return NSERROR_NOMEM;
1304 }
1305
1306 /* Validate URL */
1307 if ((lwc_string_isequal(c.scheme, corestring_lwc_http,
1308 &match) == lwc_error_ok && match == true) ||
1309 (lwc_string_isequal(c.scheme, corestring_lwc_https,
1310 &match) == lwc_error_ok && match == true)) {
1311 /* http, https must have host */
1312 if (c.host == NULL) {
1313 nsurl__components_destroy(&c);
1314 return NSERROR_BAD_URL;
1315 }
1316 }
1317
1318 e = nsurl__components_to_string(&c, NSURL_WITH_FRAGMENT,
1319 offsetof(nsurl, string), (char **)url, &length);
1320 if (e != NSERROR_OK) {
1321 return e;
1322 }
1323
1324 (*url)->components = c;
1325 (*url)->length = length;
1326
1327 /* Get the nsurl's hash */
1328 nsurl__calc_hash(*url);
1329
1330 /* Give the URL a reference */
1331 (*url)->count = 1;
1332
1333 return NSERROR_OK;
1334 }
1335
1336
1337 /* exported interface, documented in nsurl.h */
nsurl_join(const nsurl * base,const char * rel,nsurl ** joined)1338 nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
1339 {
1340 struct url_markers m;
1341 struct nsurl_components c;
1342 size_t length;
1343 char *buff;
1344 char *buff_pos;
1345 char *buff_start;
1346 nserror error = 0;
1347 enum {
1348 NSURL_F_REL = 0,
1349 NSURL_F_BASE_SCHEME = (1 << 0),
1350 NSURL_F_BASE_AUTHORITY = (1 << 1),
1351 NSURL_F_BASE_PATH = (1 << 2),
1352 NSURL_F_MERGED_PATH = (1 << 3),
1353 NSURL_F_BASE_QUERY = (1 << 4)
1354 } joined_parts;
1355
1356 assert(base != NULL);
1357 assert(rel != NULL);
1358
1359 NSLOG(netsurf, DEEPDEBUG, "base: \"%s\", rel: \"%s\"",
1360 nsurl_access(base), rel);
1361
1362 /* Peg out the URL sections */
1363 nsurl__get_string_markers(rel, &m, true);
1364
1365 /* Get the length of the longest section */
1366 length = nsurl__get_longest_section(&m);
1367
1368 /* Initially assume that the joined URL can be formed entierly from
1369 * the relative URL.
1370 */
1371 joined_parts = NSURL_F_REL;
1372
1373 /* Update joined_compnents to indicate any required parts from the
1374 * base URL.
1375 */
1376 if (m.scheme_end - m.start <= 0) {
1377 /* The relative url has no scheme.
1378 * Use base URL's scheme. */
1379 joined_parts |= NSURL_F_BASE_SCHEME;
1380
1381 if (m.path - m.authority <= 0) {
1382 /* The relative URL has no authority.
1383 * Use base URL's authority. */
1384 joined_parts |= NSURL_F_BASE_AUTHORITY;
1385
1386 if (m.query - m.path <= 0) {
1387 /* The relative URL has no path.
1388 * Use base URL's path. */
1389 joined_parts |= NSURL_F_BASE_PATH;
1390
1391 if (m.fragment - m.query <= 0) {
1392 /* The relative URL has no query.
1393 * Use base URL's query. */
1394 joined_parts |= NSURL_F_BASE_QUERY;
1395 }
1396
1397 } else if (*(rel + m.path) != '/') {
1398 /* Relative URL has relative path */
1399 joined_parts |= NSURL_F_MERGED_PATH;
1400 }
1401 }
1402 }
1403
1404 /* Allocate enough memory to url escape the longest section, plus
1405 * space for path merging (if required).
1406 */
1407 if (joined_parts & NSURL_F_MERGED_PATH) {
1408 /* Need to merge paths */
1409 length += (base->components.path != NULL) ?
1410 lwc_string_length(base->components.path) : 0;
1411 }
1412 length *= 4;
1413 /* Plus space for removing dots from path */
1414 length += (m.query - m.path) + ((base->components.path != NULL) ?
1415 lwc_string_length(base->components.path) : 0);
1416
1417 buff = malloc(length + 5);
1418 if (buff == NULL) {
1419 return NSERROR_NOMEM;
1420 }
1421
1422 buff_pos = buff;
1423
1424 /* Form joined URL from base or rel components, as appropriate */
1425
1426 if (joined_parts & NSURL_F_BASE_SCHEME) {
1427 c.scheme_type = base->components.scheme_type;
1428
1429 c.scheme = nsurl__component_copy(base->components.scheme);
1430 } else {
1431 c.scheme_type = m.scheme_type;
1432
1433 error = nsurl__create_from_section(rel, URL_SCHEME, &m, buff, &c);
1434 if (error != NSERROR_OK) {
1435 free(buff);
1436 return error;
1437 }
1438 }
1439
1440 if (joined_parts & NSURL_F_BASE_AUTHORITY) {
1441 c.username = nsurl__component_copy(base->components.username);
1442 c.password = nsurl__component_copy(base->components.password);
1443 c.host = nsurl__component_copy(base->components.host);
1444 c.port = nsurl__component_copy(base->components.port);
1445 } else {
1446 error = nsurl__create_from_section(rel, URL_CREDENTIALS, &m,
1447 buff, &c);
1448 if (error == NSERROR_OK) {
1449 error = nsurl__create_from_section(rel, URL_HOST, &m,
1450 buff, &c);
1451 }
1452 if (error != NSERROR_OK) {
1453 free(buff);
1454 return error;
1455 }
1456 }
1457
1458 if (joined_parts & NSURL_F_BASE_PATH) {
1459 c.path = nsurl__component_copy(base->components.path);
1460
1461 } else if (joined_parts & NSURL_F_MERGED_PATH) {
1462 struct url_markers m_path;
1463 size_t new_length;
1464
1465 /* RFC3986 said to append relative path to "/" if the
1466 * base path had no path and an authority.
1467 *
1468 * However, that specification is redundant, and base paths
1469 * are normalised, so file, http, and https URLs will always
1470 * have a non-empty path. (Empty paths become "/".)
1471 */
1472
1473 {
1474 /* Append relative path to all but last segment of
1475 * base path. */
1476 size_t path_end = lwc_string_length(
1477 base->components.path);
1478 const char *path = lwc_string_data(
1479 base->components.path);
1480
1481 while (*(path + path_end) != '/' &&
1482 path_end != 0) {
1483 path_end--;
1484 }
1485 if (*(path + path_end) == '/')
1486 path_end++;
1487
1488 /* Copy the base part */
1489 memcpy(buff_pos, path, path_end);
1490 buff_pos += path_end;
1491
1492 /* Copy the relative part */
1493 memcpy(buff_pos, rel + m.path, m.query - m.path);
1494 buff_pos += m.query - m.path;
1495 }
1496
1497 /* add termination to string */
1498 *buff_pos++ = '\0';
1499
1500 new_length = nsurl__remove_dot_segments(buff, buff_pos);
1501
1502 m_path.path = 0;
1503 m_path.query = new_length;
1504
1505 buff_start = buff_pos + new_length;
1506 error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
1507 buff_start, &c);
1508 if (error != NSERROR_OK) {
1509 free(buff);
1510 return error;
1511 }
1512
1513 } else {
1514 struct url_markers m_path;
1515 size_t new_length;
1516
1517 memcpy(buff_pos, rel + m.path, m.query - m.path);
1518 buff_pos += m.query - m.path;
1519 *(buff_pos++) = '\0';
1520
1521 new_length = nsurl__remove_dot_segments(buff, buff_pos);
1522
1523 m_path.path = 0;
1524 m_path.query = new_length;
1525
1526 buff_start = buff_pos + new_length;
1527
1528 error = nsurl__create_from_section(buff_pos, URL_PATH, &m_path,
1529 buff_start, &c);
1530 if (error != NSERROR_OK) {
1531 free(buff);
1532 return error;
1533 }
1534 }
1535
1536 if (joined_parts & NSURL_F_BASE_QUERY) {
1537 c.query = nsurl__component_copy(base->components.query);
1538 } else {
1539 error = nsurl__create_from_section(rel, URL_QUERY, &m,
1540 buff, &c);
1541 if (error != NSERROR_OK) {
1542 free(buff);
1543 return error;
1544 }
1545 }
1546
1547 error = nsurl__create_from_section(rel, URL_FRAGMENT, &m, buff, &c);
1548
1549 /* Free temporary buffer */
1550 free(buff);
1551
1552 if (error != NSERROR_OK) {
1553 return error;
1554 }
1555
1556 error = nsurl__components_to_string(&c, NSURL_WITH_FRAGMENT,
1557 offsetof(nsurl, string), (char **)joined, &length);
1558 if (error != NSERROR_OK) {
1559 return error;
1560 }
1561
1562 (*joined)->components = c;
1563 (*joined)->length = length;
1564
1565 /* Get the nsurl's hash */
1566 nsurl__calc_hash(*joined);
1567
1568 /* Give the URL a reference */
1569 (*joined)->count = 1;
1570
1571 return NSERROR_OK;
1572 }
1573