1 /*
2 GSK - a library to write servers
3 Copyright (C) 1999-2000 Dave Benson
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19 Contact:
20 daveb@ffem.org <Dave Benson>
21 */
22
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <stdlib.h>
27 #include "gskurl.h"
28 #include "../gskmacros.h"
29
30 static GObjectClass *parent_class = NULL;
31
32 #define IS_SCHEME_CHAR(c) \
33 (isalnum(c) || (c) == '+' || (c) == '-' || (c) == '.')
34
35 typedef enum
36 {
37 GSK_URL_INTERPRETATION_RELATIVE, /* relative url */
38 GSK_URL_INTERPRETATION_ABSOLUTE, /* same host, absolute url */
39 GSK_URL_INTERPRETATION_REMOTE, /* url on remote host */
40 GSK_URL_INTERPRETATION_UNKNOWN
41 } GskUrlInterpretation;
42
43 static const char *
gsk_url_scheme_name(GskUrlScheme scheme)44 gsk_url_scheme_name (GskUrlScheme scheme)
45 {
46 switch (scheme)
47 {
48 case GSK_URL_SCHEME_FILE: return "file";
49 case GSK_URL_SCHEME_HTTP: return "http";
50 case GSK_URL_SCHEME_HTTPS: return "https";
51 case GSK_URL_SCHEME_FTP: return "ftp";
52 case GSK_URL_SCHEME_OTHER: return "?other?";
53 default: return NULL;
54 }
55 }
56
57 /* general sanity check */
58 gboolean
gsk_url_is_valid_hostname(const char * hostname,char * bad_char_out)59 gsk_url_is_valid_hostname (const char *hostname, char *bad_char_out)
60 {
61 while (*hostname)
62 {
63 if (!isalnum (*hostname)
64 && *hostname != '-'
65 && *hostname != '-'
66 && *hostname != '.')
67 {
68 *bad_char_out = *hostname;
69 return FALSE;
70 }
71 hostname++;
72 }
73 return TRUE;
74 }
75
76 gboolean
gsk_url_is_valid_generic_component(const char * str,char * bad_char_out)77 gsk_url_is_valid_generic_component (const char *str, char *bad_char_out)
78 {
79 while (33 <= *str && *str <= 126)
80 str++;
81 if (*str == 0)
82 return TRUE;
83 *bad_char_out = *str;
84 return FALSE;
85 }
86
87 static inline gboolean
url_check_is_valid(GskUrl * url,GError ** error)88 url_check_is_valid (GskUrl *url, GError **error)
89 {
90 char bad_char;
91 if (url->host && !gsk_url_is_valid_hostname (url->host, &bad_char))
92 {
93 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
94 "URL %s constructed uses unallowed character '%c' (0x%02x)",
95 "host", bad_char, bad_char);
96 return FALSE;
97 }
98 if (url->path && !gsk_url_is_valid_path (url->path, &bad_char))
99 {
100 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
101 "URL %s constructed uses unallowed character '%c' (0x%02x)",
102 "path", bad_char, bad_char);
103 return FALSE;
104 }
105 if (url->query && !gsk_url_is_valid_query (url->query, &bad_char))
106 {
107 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
108 "URL %s constructed uses unallowed character '%c' (0x%02x)",
109 "query", bad_char, bad_char);
110 return FALSE;
111 }
112 if (url->fragment && !gsk_url_is_valid_fragment (url->fragment, &bad_char))
113 {
114 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
115 "URL %s constructed uses unallowed character '%c' (0x%02x)",
116 "fragment", bad_char, bad_char);
117 return FALSE;
118 }
119 return TRUE;
120 }
121
122 static GskUrl *
gsk_url_new_from_scheme_specific(GskUrlScheme scheme,const char * spec,GError ** error)123 gsk_url_new_from_scheme_specific (GskUrlScheme scheme,
124 const char *spec,
125 GError **error)
126 {
127 int num_slashes;
128 const char *start = spec;
129 GskUrlInterpretation interpretation = GSK_URL_INTERPRETATION_UNKNOWN;
130 GskUrl *url;
131
132 char *host, *user_name, *password, *path, *query, *fragment;
133 int port;
134
135 num_slashes = 0;
136 while (*spec == '/')
137 {
138 num_slashes++;
139 spec++;
140 }
141 if (scheme == GSK_URL_SCHEME_FILE)
142 interpretation = GSK_URL_INTERPRETATION_ABSOLUTE;
143 else
144 switch (num_slashes)
145 {
146 case 0:
147 interpretation = GSK_URL_INTERPRETATION_RELATIVE;
148 break;
149 case 1:
150 interpretation = GSK_URL_INTERPRETATION_ABSOLUTE;
151 break;
152 case 2:
153 /* ``schemes including a top hierarchical element for a naming
154 * authority'' (Section 3.2)
155 */
156 interpretation = GSK_URL_INTERPRETATION_REMOTE;
157 break;
158 case 3:
159 /* File urls (well those are now handled above so this
160 * is pretty dubious)
161 */
162 interpretation = GSK_URL_INTERPRETATION_ABSOLUTE;
163 break;
164 default:
165 /* syntax error? */
166 break;
167 }
168
169
170 host = NULL;
171 port = 0;
172 user_name = NULL;
173 path = NULL;
174 query = NULL;
175 fragment = NULL;
176 password = NULL;
177
178 switch (interpretation)
179 {
180 case GSK_URL_INTERPRETATION_REMOTE:
181 /* rfc 2396, section 3.2.2. */
182 {
183 const char *end_hostport;
184 const char *host_start;
185 const char *host_end;
186 const char *at_sign;
187 const char *colon;
188 /* basically the syntax is:
189 * USER@HOST:PORT/
190 * ^ | ^
191 * at_sign ^ end_hostport
192 * colon
193 */
194 end_hostport = strchr (spec, '/');
195 if (end_hostport == NULL)
196 #if 1
197 end_hostport = strchr (spec, 0);
198 #else /* too strict for casual use ;) */
199 {
200 /* TODO: it's kinda hard to pinpoint where this
201 is specified. See Section 3 in RFC 2396. */
202 g_set_error (error, GSK_G_ERROR_DOMAIN,
203 GSK_ERROR_INVALID_ARGUMENT,
204 _("missing / after host in URL"));
205 return NULL;
206 }
207 #endif
208 at_sign = memchr (spec, '@', end_hostport - spec);
209 host_start = at_sign != NULL ? (at_sign + 1) : spec;
210 colon = memchr (host_start, ':', end_hostport - host_start);
211 if (at_sign != NULL)
212 {
213 const char *password_sep = memchr (spec, ':', at_sign - spec);
214 if (password_sep)
215 {
216 user_name = g_strndup (spec, password_sep - spec);
217 password = g_strndup (password_sep + 1,
218 at_sign - (password_sep + 1));
219 }
220 else
221 {
222 user_name = g_strndup (spec, at_sign - spec);
223 }
224 /* XXX: should validate username against
225 * GSK_URL_USERNAME_CHARSET
226 */
227 }
228 host_end = colon != NULL ? colon : end_hostport;
229 host = g_strndup (host_start, host_end - host_start);
230
231 if (colon != NULL)
232 port = atoi (colon + 1);
233
234 spec = end_hostport;
235 if (*spec == 0)
236 {
237 GskUrl *url;
238 url = gsk_url_new_from_parts (scheme, host, port,
239 NULL, NULL, "/", NULL, NULL);
240 g_free (host);
241 return url;
242 }
243 }
244
245 /* fall through to parse the host-specific part of the url */
246 case GSK_URL_INTERPRETATION_RELATIVE:
247 case GSK_URL_INTERPRETATION_ABSOLUTE:
248 {
249 const char *query_start;
250 const char *frag_start;
251 if (num_slashes > 0
252 && interpretation == GSK_URL_INTERPRETATION_ABSOLUTE)
253 spec--;
254 query_start = strchr (spec, '?');
255 frag_start = strchr (query_start != NULL ? query_start : spec, '#');
256 if (query_start != NULL)
257 path = g_strndup (spec, query_start - spec);
258 else if (frag_start != NULL)
259 path = g_strndup (spec, frag_start - spec);
260 else
261 path = g_strdup (spec);
262 if (query_start != NULL)
263 {
264 if (frag_start != NULL)
265 query = g_strndup ((query_start+1), frag_start - (query_start+1));
266 else
267 query = g_strdup (query_start + 1);
268 }
269 if (frag_start != NULL)
270 fragment = g_strdup (frag_start + 1);
271 break;
272 }
273 case GSK_URL_INTERPRETATION_UNKNOWN:
274 {
275 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_BAD_FORMAT,
276 _("cannot guess how to interpret %s:%s"),
277 gsk_url_scheme_name (scheme), start);
278 goto error;
279 }
280 }
281
282 if (interpretation == GSK_URL_INTERPRETATION_REMOTE
283 && (host == NULL || host[0] == '\0' || !isalnum (host[0])))
284 {
285 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_BAD_FORMAT,
286 _("malformed host: should begin with a letter or number (%s)"),
287 host);
288 goto error;
289 }
290
291
292
293 url = g_object_new (GSK_TYPE_URL, NULL);
294 url->scheme = scheme;
295 if (scheme == GSK_URL_SCHEME_OTHER)
296 url->scheme_name = NULL;
297 else
298 url->scheme_name = (char *) gsk_url_scheme_name (scheme);
299 url->host = host;
300 url->user_name = user_name;
301 url->password = password;
302 url->query = query;
303 url->fragment = fragment;
304 url->port = port;
305 url->path = path;
306
307 if (!url_check_is_valid (url, error))
308 {
309 g_object_unref (url);
310 return NULL;
311 }
312 return url;
313
314 error:
315 g_free (host);
316 g_free (user_name);
317 g_free (password);
318 g_free (query);
319 g_free (fragment);
320 g_free (path);
321 return NULL;
322 }
323
324 /**
325 * gsk_url_get_relative_path:
326 * @url: the URL to get the host-relative path from.
327 *
328 * Obtain the path portion of a URL without
329 * the initial slash (/) character.
330 *
331 * The query component and fragment are also returned.
332 *
333 * returns: the URL as a string. This must be freed by the caller.
334 */
335 char *
gsk_url_get_relative_path(GskUrl * url)336 gsk_url_get_relative_path (GskUrl *url)
337 {
338 GString *string = g_string_new ("");
339 g_string_append (string, url->path);
340 if (url->query != NULL)
341 {
342 g_string_append_c (string, '?');
343 g_string_append (string, url->query);
344 }
345 if (url->fragment != NULL)
346 {
347 g_string_append_c (string, '#');
348 g_string_append (string, url->fragment);
349 }
350 return g_string_free (string, FALSE);
351 }
352
353 /**
354 * gsk_url_new_from_parts:
355 * @scheme: the type of URL being created.
356 * @host: the name (or numeric address as ASCII digits and dots) of the host.
357 * This is called the Authority by RFC 2396, Section 3.2.
358 * @port: the port number to use for the service, or 0 to use the default port
359 * for this type of URL scheme. For FTP, this is the control port and the data port
360 * will default to the next integer.
361 * @user_name: optional username identifier from the client.
362 * @password: optional password to authenticate.
363 * @path: the host-relative path for the URL
364 * @query: optional query string for URL.
365 * @fragment: optional information about a sublocation in the resource.
366 *
367 * Allocate a new URL from a bunch of pieces.
368 *
369 * returns: a reference to a new URL object.
370 */
371 GskUrl *
gsk_url_new_from_parts(GskUrlScheme scheme,const char * host,int port,const char * user_name,const char * password,const char * path,const char * query,const char * fragment)372 gsk_url_new_from_parts (GskUrlScheme scheme,
373 const char *host,
374 int port,
375 const char *user_name,
376 const char *password,
377 const char *path,
378 const char *query,
379 const char *fragment)
380 {
381 GskUrl *url = g_object_new (GSK_TYPE_URL, NULL);
382 url->scheme = scheme;
383 url->scheme_name = (char *) gsk_url_scheme_name (scheme);
384 url->host = g_strdup (host);
385 url->port = port;
386 url->user_name = g_strdup (user_name);
387 url->password = g_strdup (password);
388 url->path = g_strdup (path);
389 url->query = g_strdup (query);
390 url->fragment = g_strdup (fragment);
391 return url;
392 }
393
394 static void
gsk_url_finalize(GObject * object)395 gsk_url_finalize(GObject *object)
396 {
397 GskUrl *url = GSK_URL (object);
398 if (url->scheme == GSK_URL_SCHEME_OTHER)
399 g_free (url->scheme_name);
400 g_free (url->host);
401 g_free (url->user_name);
402 g_free (url->path);
403 g_free (url->query);
404 g_free (url->fragment);
405 (*parent_class->finalize) (object);
406 }
407
408 typedef struct _UrlSchemeTableEntry UrlSchemeTableEntry;
409 struct _UrlSchemeTableEntry
410 {
411 char *name;
412 GskUrlScheme scheme;
413 };
414
415 static void
skip_scheme(const char ** ptr)416 skip_scheme (const char **ptr)
417 {
418 /* RFC 2396, Section 3.1 */
419 if (isalpha (**ptr))
420 (*ptr)++;
421 else
422 return;
423 while (**ptr && (IS_SCHEME_CHAR (**ptr)))
424 (*ptr)++;
425 }
426
pstrcmp(const void * a,const void * b)427 static int pstrcmp (const void *a, const void *b)
428 {
429 return strcmp (*(char**)a, *(char**)b);
430 }
431
lookup_scheme_from_name(const char * scheme_start,const char * scheme_end,GskUrlScheme * scheme_out)432 static gboolean lookup_scheme_from_name (const char *scheme_start,
433 const char *scheme_end,
434 GskUrlScheme *scheme_out)
435 {
436 static UrlSchemeTableEntry table[] = {
437 /* MUST BE SORTED */
438 { "file", GSK_URL_SCHEME_FILE },
439 { "ftp", GSK_URL_SCHEME_FTP },
440 { "http", GSK_URL_SCHEME_HTTP },
441 { "https", GSK_URL_SCHEME_HTTPS },
442 };
443 int i;
444 UrlSchemeTableEntry tmp;
445 UrlSchemeTableEntry *entry;
446 #define NUM_SCHEMES G_N_ELEMENTS (table)
447 tmp.name = alloca (scheme_end - scheme_start + 1);
448 for (i = 0; i < scheme_end - scheme_start; i++)
449 tmp.name[i] = tolower (scheme_start[i]);
450 tmp.name[i] = '\0';
451 entry = bsearch (&tmp, table, G_N_ELEMENTS (table),
452 sizeof (UrlSchemeTableEntry), pstrcmp);
453 if (entry == NULL)
454 return FALSE;
455 *scheme_out = entry->scheme;
456 return TRUE;
457 }
458
459 /**
460 * gsk_url_new:
461 * @spec: standard string representation of the URL.
462 * @error: place to store a #GError if an error occurs.
463 *
464 * Parse a URL object from a string.
465 *
466 * returns: a reference to a new URL object, or NULL if an error occurred.
467 */
gsk_url_new(const char * spec,GError ** error)468 GskUrl *gsk_url_new (const char *spec,
469 GError **error)
470 {
471 const char *scheme_start;
472 const char *scheme_end;
473 GskUrlScheme scheme;
474
475 scheme_start = spec;
476 skip_scheme (&spec);
477 scheme_end = spec;
478
479 if (*spec != ':')
480 {
481 /* Url scheme did not end in ':' */
482 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_BAD_FORMAT,
483 "URL did not begin scheme:");
484 return NULL;
485 }
486 scheme_end = spec;
487 /* skip the colon */
488 spec++;
489 if (!lookup_scheme_from_name (scheme_start, scheme_end, &scheme))
490 {
491 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_BAD_FORMAT,
492 "URL did not begin with known scheme");
493 return NULL;
494 }
495
496 return gsk_url_new_from_scheme_specific (scheme, spec, error);
497 }
498
499 /**
500 * gsk_url_new_in_context:
501 * @spec: rough URL specification. This may be a complete URL,
502 * or it may have an implied scheme.
503 * @context: default scheme for URL's in your context.
504 * @error: place to store a #GError if something goes wrong.
505 *
506 * For places where you expect a certain type of URL,
507 * soemtimes people get lazy and drop the scheme.
508 * We support this here, by allowing a "backup scheme"
509 * to be specified.
510 *
511 * To be fully paranoid in such a situation, you may wish to
512 * if there appears to be a scheme, use gsk_url_new();
513 * otherwise call gsk_url_new_from_scheme_specific() directly.
514 * Alternately, it may be easier just to call
515 * gsk_url_new_in_context() directly all the time.
516 *
517 * See also gsk_url_new_relative().
518 *
519 * returns: a newly allocated URL object.
520 */
gsk_url_new_in_context(const char * spec,GskUrlScheme context,GError ** error)521 GskUrl *gsk_url_new_in_context(const char *spec,
522 GskUrlScheme context,
523 GError **error)
524 {
525 const char *scheme_start;
526 const char *scheme_end;
527 GskUrlScheme scheme;
528 scheme_start = spec;
529 skip_scheme (&spec);
530 scheme_end = spec;
531 if (scheme_start == scheme_end)
532 scheme = context;
533 else
534 {
535 if (!lookup_scheme_from_name (scheme_start, scheme_end, &scheme))
536 {
537 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
538 _("unknown url scheme (start of '%s')"), scheme_start);
539 return NULL;
540 }
541 /* skip the colon */
542 spec++;
543 }
544
545 return gsk_url_new_from_scheme_specific (scheme, spec, error);
546 }
547
548 /**
549 * gsk_url_new_relative:
550 * @base_url: context of the @spec found. This tells where @spec
551 * may be relative to.
552 * @location: the possibly relative spec.
553 * @error: place to store a #GError if something goes wrong.
554 *
555 * Allocate a new URL, which will be taken
556 * to be relative to @base_url if the @location
557 * is not obviously an absolute URL.
558 *
559 * Note that there is some ambiguity in how relative urls are
560 * interpreted. Note especially that
561 * /foo + /bar = /bar.
562 * /foo + bar = /bar.
563 * /foo/ + bar = /foo/bar.
564 * That is, a symbol with a trailing slash is a directory,
565 * otherwise the last piece of the url is assumed to be a file.
566 *
567 * returns: a newly allocated URL object.
568 */
569 GskUrl *
gsk_url_new_relative(GskUrl * base_url,const char * location,GError ** error)570 gsk_url_new_relative (GskUrl *base_url,
571 const char *location,
572 GError **error)
573 {
574 /* XXX: what is the right way to determine if a string is
575 * a absolute v. relative url???
576 * XXX: definitely NOT this, which doesn't have
577 * http:foo/bar.html
578 */
579 /* TODO: See RFC 2396 section 5, "Relative URI References"? */
580
581 /* if we have a ':' before a '/' character,
582 then assume a full url. */
583 const char *tmp;
584
585 GSK_SKIP_WHITESPACE (location);
586
587 if (*location == 0)
588 {
589 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
590 "gsk_url_new_relative: location was empty");
591 return NULL;
592 }
593
594 tmp = location;
595 while (*tmp && *tmp != '/' && *tmp != ':')
596 tmp++;
597 if (*tmp == ':')
598 {
599 /* absolute redirect */
600 return gsk_url_new (location, error);
601 }
602 else
603 {
604 const char *query_start = strchr (location, '?');
605 const char *frag_start = strchr (query_start ? query_start : location, '#');
606 const char *location_end = query_start ? query_start
607 : frag_start ? frag_start
608 : strchr (location, 0);
609 char *query = NULL;
610 char *fragment = NULL;
611 char *path;
612 guint path_len;
613 char bad_char;
614 GskUrl *rv;
615 if (query_start)
616 {
617 query_start++;
618 query = g_alloca (strlen (query_start));
619 if (frag_start)
620 {
621 memcpy (query, query_start + 1, frag_start - query_start);
622 query[frag_start - query_start] = 0;
623 }
624 else
625 {
626 strcpy (query, query_start);
627 }
628 }
629 if (frag_start)
630 fragment = strcpy (g_alloca (strlen (frag_start)), frag_start + 1);
631
632 path_len = location_end - location;
633 if (*location == '/')
634 {
635 path = memcpy (g_alloca (path_len + 1), location, path_len);
636 path[path_len] = 0;
637 }
638 else
639 {
640 const char *last_slash = strrchr (base_url->path, '/');
641 guint len, total_len;
642 guint location_len = location_end - location;
643 if (!last_slash)
644 len = strlen (base_url->path);
645 else
646 len = last_slash - base_url->path;
647
648 /* TODO: deal with '.' and '..' */
649
650 total_len = len + 1 + location_len;
651 path = g_alloca (total_len + 1);
652 memcpy (path, base_url->path, len);
653 path[len] = '/';
654 memcpy (path + len + 1, location, location_len);
655 path[len + 1 + location_len] = '\0';
656 }
657 if (path && !gsk_url_is_valid_path (path, &bad_char))
658 {
659 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
660 "URL %s constructed uses unallowed character '%c' (0x%02x)",
661 "relative path", bad_char, bad_char);
662 return NULL;
663 }
664 if (query && !gsk_url_is_valid_query (query, &bad_char))
665 {
666 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
667 "URL %s constructed uses unallowed character '%c' (0x%02x)",
668 "query", bad_char, bad_char);
669 return NULL;
670 }
671 if (fragment && !gsk_url_is_valid_fragment (fragment, &bad_char))
672 {
673 g_set_error (error, GSK_G_ERROR_DOMAIN, GSK_ERROR_INVALID_ARGUMENT,
674 "URL %s constructed uses unallowed character '%c' (0x%02x)",
675 "fragment", bad_char, bad_char);
676 return NULL;
677 }
678 rv = gsk_url_new_from_parts (base_url->scheme,
679 base_url->host,
680 base_url->port,
681 base_url->user_name,
682 base_url->password,
683 path, query, fragment);
684 if (!url_check_is_valid (rv, error))
685 {
686 g_object_unref (rv);
687 return NULL;
688 }
689 return rv;
690 }
691 }
692
693 /**
694 * gsk_url_to_string:
695 * @url: the URL to stringify.
696 *
697 * Convert the URL to a string.
698 *
699 * returns: the newly allocated string.
700 */
701 char *
gsk_url_to_string(const GskUrl * url)702 gsk_url_to_string (const GskUrl *url)
703 {
704 guint len = strlen (url->scheme_name)
705 + 4 /* :/// (max) */
706 + (url->host ? strlen (url->host) : 0)
707 + 3
708 + (url->password ? strlen (url->password) : 0)
709 + 10 /* port */
710 + (url->user_name ? strlen (url->user_name) : 0)
711 + 1
712 + (url->path ? strlen (url->path) : 0)
713 + 1
714 + (url->query ? strlen (url->query) : 0)
715 + 1
716 + (url->fragment ? strlen (url->fragment) : 0)
717 + 10 /* extra! */
718 ;
719 char *rv = g_malloc (len);
720 char *at = rv;
721 #define ADD_STR(str) \
722 G_STMT_START{ strcpy(at,str); at=strchr(at,0); }G_STMT_END
723 #define ADD_CHAR(c) \
724 G_STMT_START{ *at++ = c; }G_STMT_END
725 ADD_STR (url->scheme_name);
726 if (url->scheme == GSK_URL_SCHEME_FILE)
727 ADD_STR ("://"); /* note: the path typically includes one more '/' */
728 else if (url->host != NULL)
729 ADD_STR ("://");
730 else
731 ADD_STR (":");
732 if (url->user_name)
733 {
734 ADD_STR (url->user_name);
735 if (url->password)
736 {
737 ADD_CHAR (':');
738 ADD_STR (url->password);
739 }
740 ADD_CHAR ('@');
741 }
742 if (url->host)
743 {
744 ADD_STR (url->host);
745 }
746 if (url->port)
747 {
748 char buf[64];
749 g_snprintf(buf,sizeof(buf),":%u", url->port);
750 ADD_STR (buf);
751 }
752 if (url->path)
753 ADD_STR (url->path);
754 if (url->query)
755 {
756 ADD_CHAR ('?');
757 ADD_STR (url->query);
758 }
759 if (url->fragment)
760 {
761 ADD_CHAR ('#');
762 ADD_STR (url->fragment);
763 }
764 *at = 0;
765
766 return rv;
767 }
768
769 /**
770 * gsk_url_get_port:
771 * @url: the URL whose port is desired.
772 *
773 * Returns the port. If the port is 0, the default port
774 * for the type of scheme is returned (80 for HTTP, 21 for FTP
775 * and 443 for HTTP/SSL). If no default exists, 0 is returned.
776 *
777 * returns: the port as an integer, or 0 if no port could be computed.
778 */
779 guint
gsk_url_get_port(const GskUrl * url)780 gsk_url_get_port (const GskUrl *url)
781 {
782 if (url->port == 0)
783 {
784 switch (url->scheme)
785 {
786 case GSK_URL_SCHEME_HTTP:
787 return 80;
788 case GSK_URL_SCHEME_HTTPS:
789 return 443;
790 case GSK_URL_SCHEME_FTP:
791 return 21;
792
793 case GSK_URL_SCHEME_FILE:
794 case GSK_URL_SCHEME_OTHER:
795 return 0;
796 }
797 }
798 return url->port;
799 }
800
801 /* --- arguments --- */
802 enum
803 {
804 PROP_0,
805 PROP_HOST,
806 PROP_PASSWORD,
807 PROP_PORT,
808 PROP_USER_NAME,
809 PROP_PATH,
810 PROP_QUERY,
811 PROP_FRAGMENT,
812 };
813
814 static void
gsk_url_get_property(GObject * object,guint property_id,GValue * value,GParamSpec * pspec)815 gsk_url_get_property (GObject *object,
816 guint property_id,
817 GValue *value,
818 GParamSpec *pspec)
819 {
820 GskUrl *url = GSK_URL (object);
821 switch (property_id)
822 {
823 case PROP_HOST:
824 g_value_set_string (value, url->host);
825 break;
826 case PROP_PASSWORD:
827 g_value_set_string (value, url->password);
828 break;
829 case PROP_PORT:
830 g_value_set_uint (value, gsk_url_get_port (url));
831 break;
832 case PROP_USER_NAME:
833 g_value_set_string (value, url->user_name);
834 break;
835 case PROP_PATH:
836 g_value_set_string (value, url->path);
837 break;
838 case PROP_QUERY:
839 g_value_set_string (value, url->query);
840 break;
841 case PROP_FRAGMENT:
842 g_value_set_string (value, url->fragment);
843 break;
844 default:
845 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
846 break;
847 }
848 }
849
850
851 static void
gsk_url_set_property(GObject * object,guint property_id,const GValue * value,GParamSpec * pspec)852 gsk_url_set_property (GObject *object,
853 guint property_id,
854 const GValue *value,
855 GParamSpec *pspec)
856 {
857 GskUrl *url = GSK_URL (object);
858 switch (property_id)
859 {
860 case PROP_HOST:
861 g_free (url->host);
862 url->host = g_strdup (g_value_get_string (value));
863 break;
864 case PROP_PASSWORD:
865 g_free (url->password);
866 url->password = g_strdup (g_value_get_string (value));
867 break;
868 case PROP_PORT:
869 url->port = g_value_get_uint (value);
870 break;
871 case PROP_USER_NAME:
872 g_free (url->user_name);
873 url->user_name = g_strdup (g_value_get_string (value));
874 break;
875 case PROP_PATH:
876 g_free (url->path);
877 url->path = g_strdup (g_value_get_string (value));
878 break;
879 case PROP_QUERY:
880 g_free (url->query);
881 url->query = g_strdup (g_value_get_string (value));
882 break;
883 case PROP_FRAGMENT:
884 g_free (url->fragment);
885 url->fragment = g_strdup (g_value_get_string (value));
886 break;
887 default:
888 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
889 break;
890 }
891 }
892
893 static void
gsk_url_init(GskUrl * url)894 gsk_url_init (GskUrl *url)
895 {
896 url->scheme = GSK_URL_SCHEME_OTHER;
897 }
898
899 static void
gsk_url_class_init(GskUrlClass * class)900 gsk_url_class_init (GskUrlClass *class)
901 {
902 GObjectClass *object_class = G_OBJECT_CLASS (class);
903 GParamSpec *pspec;
904 parent_class = g_type_class_peek_parent (class);
905 object_class->set_property = gsk_url_set_property;
906 object_class->get_property = gsk_url_get_property;
907 object_class->finalize = gsk_url_finalize;
908 pspec = g_param_spec_string ("host",
909 _("Host Name"),
910 _("name of host having resource"),
911 NULL,
912 G_PARAM_READWRITE);
913 g_object_class_install_property (object_class, PROP_HOST, pspec);
914
915 pspec = g_param_spec_string ("password",
916 _("Password"),
917 _("password protecting resource"),
918 NULL,
919 G_PARAM_READWRITE);
920 g_object_class_install_property (object_class, PROP_PASSWORD, pspec);
921
922 pspec = g_param_spec_uint ("port",
923 _("Port"),
924 _("port for resource (or 0 for default)"),
925 0, 65536, 0,
926 G_PARAM_READWRITE);
927 g_object_class_install_property (object_class, PROP_PORT, pspec);
928
929 pspec = g_param_spec_string ("user-name",
930 _("Username"),
931 _("username for resource"),
932 NULL,
933 G_PARAM_READWRITE);
934 g_object_class_install_property (object_class, PROP_USER_NAME, pspec);
935
936 pspec = g_param_spec_string ("path",
937 _("Path"),
938 _("Path on the server to the resource"),
939 NULL,
940 G_PARAM_READWRITE);
941 g_object_class_install_property (object_class, PROP_PATH, pspec);
942
943 pspec = g_param_spec_string ("query",
944 _("Query"),
945 _("Query (for HTTP resources)"),
946 NULL,
947 G_PARAM_READWRITE);
948 g_object_class_install_property (object_class, PROP_QUERY, pspec);
949
950 pspec = g_param_spec_string ("fragment",
951 _("Fragment"),
952 _("Fragment (for HTTP resources)"),
953 NULL,
954 G_PARAM_READWRITE);
955 g_object_class_install_property (object_class, PROP_FRAGMENT, pspec);
956 }
957
958 GType
gsk_url_get_type()959 gsk_url_get_type()
960 {
961 static GType url_type = 0;
962 if (!url_type)
963 {
964 static const GTypeInfo url_info =
965 {
966 sizeof(GskUrlClass),
967 (GBaseInitFunc) NULL,
968 (GBaseFinalizeFunc) NULL,
969 (GClassInitFunc) gsk_url_class_init,
970 NULL, /* class_finalize */
971 NULL, /* class_data */
972 sizeof (GskUrl),
973 0, /* n_preallocs */
974 (GInstanceInitFunc) gsk_url_init,
975 NULL /* value_table */
976 };
977 url_type = g_type_register_static (G_TYPE_OBJECT,
978 "GskUrl",
979 &url_info, 0);
980 }
981 return url_type;
982 }
983
984 /**
985 * gsk_url_hash:
986 * @url: a url.
987 *
988 * Compute a randomish hash code based on the URL.
989 *
990 * You can create a GHashTable that's keyed off of URLs with:
991 * g_hash_table_new((GHashFunc)gsk_url_hash,
992 * (GEqualFunc)gsk_url_equal);
993 *
994 * returns: the hash code.
995 */
gsk_url_hash(const GskUrl * url)996 guint gsk_url_hash (const GskUrl *url)
997 {
998 guint rv = 0;
999 rv += g_str_hash (url->scheme_name);
1000 if (url->host)
1001 rv += 33 * g_str_hash (url->host);
1002 if (url->password)
1003 rv += 1001 * g_str_hash (url->password);
1004 rv += 11 * url->port;
1005 if (url->user_name)
1006 rv ^= g_str_hash (url->user_name);
1007 if (url->path)
1008 rv ^= 101 * g_str_hash (url->path);
1009 if (url->query)
1010 rv ^= 10009 * g_str_hash (url->query);
1011 if (url->fragment)
1012 rv += 100001 * g_str_hash (url->fragment);
1013 return rv;
1014 }
safe_strs_equal(const char * a,const char * b)1015 static inline gboolean safe_strs_equal (const char *a, const char *b)
1016 {
1017 if (a == NULL && b == NULL)
1018 return TRUE;
1019 if (a == NULL || b == NULL)
1020 return FALSE;
1021 return strcmp (a,b) == 0;
1022 }
1023
1024 /**
1025 * gsk_url_equal:
1026 * @a: a url.
1027 * @b: another url.
1028 *
1029 * Test to see if two URLs are the same.
1030 *
1031 * returns: whether the URLs are the same.
1032 */
gsk_url_equal(const GskUrl * a,const GskUrl * b)1033 gboolean gsk_url_equal (const GskUrl *a,
1034 const GskUrl *b)
1035 {
1036 return safe_strs_equal (a->scheme_name, b->scheme_name)
1037 && safe_strs_equal (a->host, b->host)
1038 && safe_strs_equal (a->password, b->password)
1039 && a->port == b->port
1040 && safe_strs_equal (a->user_name, b->user_name)
1041 && safe_strs_equal (a->path, b->path)
1042 && safe_strs_equal (a->query, b->query)
1043 && safe_strs_equal (a->fragment, b->fragment);
1044 }
1045
1046 /*
1047 * True if the ascii character c should be escaped within a URI.
1048 * See RFC 2396, section 2.
1049 *
1050 * According to section 2.4: "data must be escaped if it does not have a
1051 * representation using an unreserved character," where unreserved
1052 * characters are (section 2.3): "upper and lower case letters, decimal
1053 * digits, and a limited set of punctuation marks and symbols" [see below].
1054 */
1055
1056 static guint8 should_be_escaped_data[16] =
1057 {
1058 0xff, 0xff, 0xff, 0xff, 0x7d, 0x98, 0x00, 0xfc,
1059 0x01, 0x00, 0x00, 0x78, 0x01, 0x00, 0x00, 0xb8,
1060 };
1061 static inline gboolean
should_be_escaped(char c)1062 should_be_escaped (char c)
1063 {
1064 if (c & 0x80)
1065 return TRUE;
1066 return (should_be_escaped_data[c>>3] & (1<<(7&c))) != 0;
1067 }
1068
1069 static const char *hex_characters = "0123456789abcdef";
1070
1071 /**
1072 * gsk_url_encode:
1073 * @decoded: decoded data to escape.
1074 *
1075 * Encode characters to be passed in a URL.
1076 * Basically, "unsafe" characters are converted
1077 * to %xx where 'x' is a hexidecimal digit.
1078 *
1079 * See RFC 2396 Section 2.
1080 *
1081 * returns: a newly allocated string.
1082 */
1083 char *
gsk_url_encode(const char * raw)1084 gsk_url_encode (const char *raw)
1085 {
1086 int length = 0;
1087 const char *at;
1088 char *out;
1089 char *rv;
1090 for (at = raw; *at != '\0'; at++)
1091 if (should_be_escaped (*at))
1092 length += 3;
1093 else
1094 length += 1;
1095 rv = g_new (char, length + 1);
1096 out = rv;
1097 for (at = raw; *at != '\0'; at++)
1098 if (should_be_escaped (*at))
1099 {
1100 *out++ = '%';
1101 *out++ = hex_characters [((guint8) *at) >> 4];
1102 *out++ = hex_characters [((guint8) *at) & 0xf];
1103 }
1104 else
1105 {
1106 *out = *at;
1107 out++;
1108 }
1109 *out = '\0';
1110 return rv;
1111 }
1112
1113 /**
1114 * gsk_url_decode:
1115 * @encoded: encoded URL to convert to plaintext.
1116 *
1117 * Decode characters to be passed in a URL.
1118 * Basically, any %xx string is changed to the
1119 * character whose ASCII code is xx, treating xx as
1120 * a hexidecimal 2-digit number.
1121 *
1122 * See RFC ??, Section ??.
1123 *
1124 * returns: a newly allocated string.
1125 */
1126 char *
gsk_url_decode(const char * encoded)1127 gsk_url_decode (const char *encoded)
1128 {
1129 const char *at = encoded;
1130 int length = 0;
1131 char *rv;
1132 char *out;
1133 while (*at != '\0')
1134 {
1135 if (*at == '%')
1136 {
1137 if (at[1] == '\0' || at[2] == '\0')
1138 {
1139 g_warning ("malformed URL encoded string");
1140 return NULL;
1141 }
1142 at += 3;
1143 length++;
1144 }
1145 else
1146 {
1147 at++;
1148 length++;
1149 }
1150 }
1151 rv = g_new (char, length + 1);
1152 out = rv;
1153 at = encoded;
1154 while (*at != '\0')
1155 {
1156 if (*at == '%')
1157 {
1158 char hex[3];
1159 hex[0] = at[1];
1160 hex[1] = at[2];
1161 hex[2] = '\0';
1162 if (at[1] == '\0' || at[2] == '\0')
1163 return NULL;
1164 at += 3;
1165 *out++ = (char) strtol (hex, NULL, 16);
1166 }
1167 else
1168 {
1169 *out++ = *at++;
1170 length++;
1171 }
1172 }
1173 *out = '\0';
1174 return rv;
1175 }
1176
1177 /**
1178 * gsk_url_encode_http:
1179 * @decoded: the raw url text; this is treated as raw 8-bit data,
1180 * not UTF-8.
1181 *
1182 * Do what is typically thought of
1183 * as "url encoding" in http-land... namely SPACE maps to '+'
1184 * and funny characters are encoded
1185 * as %xx where 'x' denotes a single hex-digit.
1186 *
1187 * returns: a newly allocated encoded string that the caller
1188 * must free.
1189 */
1190 char *
gsk_url_encode_http(const char * decoded)1191 gsk_url_encode_http (const char *decoded)
1192 {
1193 const char *at;
1194 guint len = 0;
1195 char *rv;
1196 char *rv_at;
1197 for (at = decoded; *at != '\0'; at++)
1198 {
1199 if (*at != ' ' && should_be_escaped (*at))
1200 len += 3;
1201 else
1202 len++;
1203 }
1204
1205 rv = g_malloc (len + 1);
1206 rv_at = rv;
1207 for (at = decoded; *at != '\0'; at++)
1208 {
1209 if (*at == ' ')
1210 *rv_at++ = '+';
1211 else if (should_be_escaped (*at))
1212 {
1213 *rv_at++ = '%';
1214 *rv_at++ = hex_characters [((guint8) *at) >> 4];
1215 *rv_at++ = hex_characters [((guint8) *at) & 0xf];
1216 }
1217 else
1218 *rv_at++ = *at;
1219 }
1220 *rv_at = '\0';
1221 return rv;
1222 }
1223
1224 /**
1225 * gsk_url_encode_http_binary:
1226 * @decoded: the raw binary data: may contain NULs.
1227 * @length: length of the binary data, in bytes.
1228 *
1229 * Do what is typically thought of
1230 * as "url encoding" in http-land... namely SPACE maps to '+'
1231 * and funny characters are encoded
1232 * as %xx where 'x' denotes a single hex-digit.
1233 *
1234 * returns: a newly allocated encoded string that the caller
1235 * must free.
1236 */
1237 char *
gsk_url_encode_http_binary(const guint8 * decoded,guint length)1238 gsk_url_encode_http_binary (const guint8 *decoded,
1239 guint length)
1240 {
1241 guint rv_len = length;
1242 char *rv;
1243 char *at;
1244 guint i;
1245 for (i = 0; i < length; i++)
1246 if (should_be_escaped (decoded[i]))
1247 rv_len += 2;
1248 rv = g_malloc (rv_len + 1);
1249 at = rv;
1250 for (i = 0; i < length; i++)
1251 if (should_be_escaped (decoded[i]))
1252 {
1253 *at++ = '%';
1254 *at++ = hex_characters[decoded[i] >> 4];
1255 *at++ = hex_characters[decoded[i] & 0xf];
1256 }
1257 else
1258 *at++ = decoded[i];
1259 *at = 0;
1260 return rv;
1261 }
1262
1263 /**
1264 * gsk_url_decode_http:
1265 * @encoded: the encoded url text.
1266 *
1267 * Do what is typically thought of
1268 * as "url decoding" in http-land... namely '+' maps to SPACE
1269 * and %xx, where 'x' denotes a single hex-digit, maps to the character
1270 * given as hexidecimal. (warning: the resulting string is not UTF-8)
1271 *
1272 * returns: a newly allocated encoded string that the caller
1273 * must free (the empty string "" when unable to decode hex).
1274 */
1275 char *
gsk_url_decode_http(const char * encoded)1276 gsk_url_decode_http (const char *encoded)
1277 {
1278 const char *at;
1279 guint len = 0;
1280 char *rv;
1281 char *rv_at;
1282 for (at = encoded; *at != '\0'; at++)
1283 {
1284 if (*at == '%')
1285 {
1286 at++;
1287 if (!isxdigit(*at))
1288 return g_strdup ("");
1289 at++;
1290 if (!isxdigit(*at))
1291 return g_strdup ("");
1292 len++;
1293 }
1294 else
1295 {
1296 len++;
1297 }
1298 }
1299 rv = g_malloc (len + 1);
1300 rv_at = rv;
1301 for (at = encoded; *at != '\0'; at++)
1302 {
1303 if (*at == '%')
1304 {
1305 char hex[3];
1306 hex[0] = *(++at);
1307 hex[1] = *(++at);
1308 hex[2] = 0;
1309 *rv_at++ = (char) strtol (hex, NULL, 16);
1310 }
1311 else if (*at == '+')
1312 *rv_at++ = ' ';
1313 else
1314 *rv_at++ = *at;
1315 }
1316 *rv_at = '\0';
1317 return rv;
1318 }
1319
1320 /* gsk_url_split_form_urlencoded:
1321 * @encoded_query: the encoded form data
1322 *
1323 * Split an "application/x-www-form-urlencoded"
1324 * format query string into key-value pairs.
1325 *
1326 * See RFC 1866, section 8.2.1.
1327 *
1328 * returns: a null-terminated array of strings: key, value, ... NULL.
1329 * Caller must free result with g_strfreev.
1330 */
1331 char **
gsk_url_split_form_urlencoded(const char * encoded_query)1332 gsk_url_split_form_urlencoded (const char *encoded_query)
1333 {
1334 enum { START, GOT_OTHER, GOT_EQUALS, INVALID } state = START;
1335 guint num_pairs = 0;
1336 const char *query_at;
1337 char **rv, **rv_at;
1338 char *copy, *copy_at;
1339 const char *name = "", *value = "";
1340
1341 g_return_val_if_fail (encoded_query, NULL);
1342
1343 /* Scan for valid pairs:
1344 * one more more [^&=]; =; zero or more [^&=]; & or end.
1345 */
1346 for (query_at = encoded_query; ; ++query_at)
1347 switch (*query_at)
1348 {
1349 case '\0':
1350 if (state == GOT_EQUALS)
1351 ++num_pairs;
1352 goto DONE_SCANNING;
1353 case '&':
1354 if (state == GOT_EQUALS)
1355 ++num_pairs;
1356 state = START;
1357 break;
1358 case '=':
1359 state = GOT_OTHER ? GOT_EQUALS : INVALID;
1360 break;
1361 default:
1362 if (state == START)
1363 state = GOT_OTHER;
1364 break;
1365 }
1366 DONE_SCANNING:
1367 /* num_pairs * (name, value) + terminating NULL */
1368 rv = g_new (gchar *, (num_pairs << 1) + 1);
1369
1370 copy = g_strdup (encoded_query);
1371 for (state = START, rv_at = rv, copy_at = copy; ; ++copy_at)
1372 switch (*copy_at)
1373 {
1374 case '\0':
1375 if (state == GOT_EQUALS)
1376 {
1377 *rv_at++ = gsk_url_decode_http (name);
1378 *rv_at++ = gsk_url_decode_http (value);
1379 }
1380 goto DONE;
1381 case '&':
1382 if (state == GOT_EQUALS)
1383 {
1384 *copy_at = 0;
1385 *rv_at++ = gsk_url_decode_http (name);
1386 *rv_at++ = gsk_url_decode_http (value);
1387 }
1388 state = START;
1389 break;
1390 case '=':
1391 if (state == GOT_OTHER)
1392 {
1393 state = GOT_EQUALS;
1394 *copy_at = 0;
1395 value = copy_at + 1;
1396 }
1397 else
1398 state = INVALID;
1399 break;
1400 default:
1401 if (state == START)
1402 {
1403 state = GOT_OTHER;
1404 name = copy_at;
1405 }
1406 break;
1407 }
1408 DONE:
1409 g_free (copy);
1410 *rv_at = NULL;
1411 return rv;
1412 }
1413