1 /* GLIB - Library of useful routines for C programming
2 * Copyright © 2020 Red Hat, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General
15 * Public License along with this library; if not, see
16 * <http://www.gnu.org/licenses/>.
17 */
18
19 #include "config.h"
20
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "glib.h"
25 #include "glibintl.h"
26 #include "guriprivate.h"
27
28 /**
29 * SECTION:guri
30 * @short_description: URI-handling utilities
31 * @include: glib.h
32 *
33 * The #GUri type and related functions can be used to parse URIs into
34 * their components, and build valid URIs from individual components.
35 *
36 * Note that #GUri scope is to help manipulate URIs in various applications,
37 * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
38 * it doesn't intend to cover web browser needs, and doesn't implement the
39 * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
40 * help prevent
41 * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
42 * #GUri is not suitable for formatting URIs for display to the user for making
43 * security-sensitive decisions.
44 *
45 * ## Relative and absolute URIs # {#relative-absolute-uris}
46 *
47 * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
48 * hierarchical nature of URIs means that they can either be ‘relative
49 * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
50 * clarity, ‘URIs’ are referred to in this documentation as
51 * ‘absolute URIs’ — although
52 * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
53 * fragment identifiers are always allowed).
54 *
55 * Relative references have one or more components of the URI missing. In
56 * particular, they have no scheme. Any other component, such as hostname,
57 * query, etc. may be missing, apart from a path, which has to be specified (but
58 * may be empty). The path may be relative, starting with `./` rather than `/`.
59 *
60 * For example, a valid relative reference is `./path?query`,
61 * `/?query#fragment` or `//example.com`.
62 *
63 * Absolute URIs have a scheme specified. Any other components of the URI which
64 * are missing are specified as explicitly unset in the URI, rather than being
65 * resolved relative to a base URI using g_uri_parse_relative().
66 *
67 * For example, a valid absolute URI is `file:///home/bob` or
68 * `https://search.com?query=string`.
69 *
70 * A #GUri instance is always an absolute URI. A string may be an absolute URI
71 * or a relative reference; see the documentation for individual functions as to
72 * what forms they accept.
73 *
74 * ## Parsing URIs
75 *
76 * The most minimalist APIs for parsing URIs are g_uri_split() and
77 * g_uri_split_with_user(). These split a URI into its component
78 * parts, and return the parts; the difference between the two is that
79 * g_uri_split() treats the ‘userinfo’ component of the URI as a
80 * single element, while g_uri_split_with_user() can (depending on the
81 * #GUriFlags you pass) treat it as containing a username, password,
82 * and authentication parameters. Alternatively, g_uri_split_network()
83 * can be used when you are only interested in the components that are
84 * needed to initiate a network connection to the service (scheme,
85 * host, and port).
86 *
87 * g_uri_parse() is similar to g_uri_split(), but instead of returning
88 * individual strings, it returns a #GUri structure (and it requires
89 * that the URI be an absolute URI).
90 *
91 * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
92 * resolve a relative URI relative to a base URI.
93 * g_uri_resolve_relative() takes two strings and returns a string,
94 * and g_uri_parse_relative() takes a #GUri and a string and returns a
95 * #GUri.
96 *
97 * All of the parsing functions take a #GUriFlags argument describing
98 * exactly how to parse the URI; see the documentation for that type
99 * for more details on the specific flags that you can pass. If you
100 * need to choose different flags based on the type of URI, you can
101 * use g_uri_peek_scheme() on the URI string to check the scheme
102 * first, and use that to decide what flags to parse it with.
103 *
104 * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
105 * params for a web URI, so compare the result of g_uri_peek_scheme() against
106 * `http` and `https`.
107 *
108 * ## Building URIs
109 *
110 * g_uri_join() and g_uri_join_with_user() can be used to construct
111 * valid URI strings from a set of component strings. They are the
112 * inverse of g_uri_split() and g_uri_split_with_user().
113 *
114 * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
115 * construct a #GUri from a set of component strings.
116 *
117 * As with the parsing functions, the building functions take a
118 * #GUriFlags argument. In particular, it is important to keep in mind
119 * whether the URI components you are using are already `%`-encoded. If so,
120 * you must pass the %G_URI_FLAGS_ENCODED flag.
121 *
122 * ## `file://` URIs
123 *
124 * Note that Windows and Unix both define special rules for parsing
125 * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
126 * interpretation of path separators on Windows). #GUri does not
127 * implement these rules. Use g_filename_from_uri() and
128 * g_filename_to_uri() if you want to properly convert between
129 * `file://` URIs and local filenames.
130 *
131 * ## URI Equality
132 *
133 * Note that there is no `g_uri_equal ()` function, because comparing
134 * URIs usefully requires scheme-specific knowledge that #GUri does
135 * not have. #GUri can help with normalization if you use the various
136 * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
137 * it is not comprehensive.
138 * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
139 * thing according to the `data:` URI specification which GLib does not
140 * handle.
141 *
142 * Since: 2.66
143 */
144
145 /**
146 * GUri:
147 *
148 * A parsed absolute URI.
149 *
150 * Since #GUri only represents absolute URIs, all #GUris will have a
151 * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
152 * answer. Likewise, by definition, all URIs have a path component, so
153 * g_uri_get_path() will always return a non-%NULL string (which may be empty).
154 *
155 * If the URI string has an
156 * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
157 * is, if the scheme is followed by `://` rather than just `:`), then the
158 * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
159 * Additionally, depending on how the #GUri was constructed/parsed (for example,
160 * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
161 * the userinfo may be split out into a username, password, and
162 * additional authorization-related parameters.
163 *
164 * Normally, the components of a #GUri will have all `%`-encoded
165 * characters decoded. However, if you construct/parse a #GUri with
166 * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
167 * the userinfo, path, and query fields (and in the host field if also
168 * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
169 * the URI may contain binary data or non-UTF-8 text, or if decoding
170 * the components might change the interpretation of the URI.
171 *
172 * For example, with the encoded flag:
173 *
174 * |[<!-- language="C" -->
175 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
176 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
177 * ]|
178 *
179 * While the default `%`-decoding behaviour would give:
180 *
181 * |[<!-- language="C" -->
182 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
183 * g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
184 * ]|
185 *
186 * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
187 * with an error indicating the bad string location:
188 *
189 * |[<!-- language="C" -->
190 * g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
191 * g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
192 * ]|
193 *
194 * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
195 * need to handle that case manually. In particular, if the query string
196 * contains `=` characters that are `%`-encoded, you should let
197 * g_uri_parse_params() do the decoding once of the query.
198 *
199 * #GUri is immutable once constructed, and can safely be accessed from
200 * multiple threads. Its reference counting is atomic.
201 *
202 * Since: 2.66
203 */
204 struct _GUri {
205 gchar *scheme;
206 gchar *userinfo;
207 gchar *host;
208 gint port;
209 gchar *path;
210 gchar *query;
211 gchar *fragment;
212
213 gchar *user;
214 gchar *password;
215 gchar *auth_params;
216
217 GUriFlags flags;
218 };
219
220 /**
221 * g_uri_ref: (skip)
222 * @uri: a #GUri
223 *
224 * Increments the reference count of @uri by one.
225 *
226 * Returns: @uri
227 *
228 * Since: 2.66
229 */
230 GUri *
g_uri_ref(GUri * uri)231 g_uri_ref (GUri *uri)
232 {
233 g_return_val_if_fail (uri != NULL, NULL);
234
235 return g_atomic_rc_box_acquire (uri);
236 }
237
238 static void
g_uri_clear(GUri * uri)239 g_uri_clear (GUri *uri)
240 {
241 g_free (uri->scheme);
242 g_free (uri->userinfo);
243 g_free (uri->host);
244 g_free (uri->path);
245 g_free (uri->query);
246 g_free (uri->fragment);
247 g_free (uri->user);
248 g_free (uri->password);
249 g_free (uri->auth_params);
250 }
251
252 /**
253 * g_uri_unref: (skip)
254 * @uri: a #GUri
255 *
256 * Atomically decrements the reference count of @uri by one.
257 *
258 * When the reference count reaches zero, the resources allocated by
259 * @uri are freed
260 *
261 * Since: 2.66
262 */
263 void
g_uri_unref(GUri * uri)264 g_uri_unref (GUri *uri)
265 {
266 g_return_if_fail (uri != NULL);
267
268 g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
269 }
270
271 static gboolean
g_uri_char_is_unreserved(gchar ch)272 g_uri_char_is_unreserved (gchar ch)
273 {
274 if (g_ascii_isalnum (ch))
275 return TRUE;
276 return ch == '-' || ch == '.' || ch == '_' || ch == '~';
277 }
278
279 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
280 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
281
282 static gssize
uri_decoder(gchar ** out,const gchar * illegal_chars,const gchar * start,gsize length,gboolean just_normalize,gboolean www_form,GUriFlags flags,GUriError parse_error,GError ** error)283 uri_decoder (gchar **out,
284 const gchar *illegal_chars,
285 const gchar *start,
286 gsize length,
287 gboolean just_normalize,
288 gboolean www_form,
289 GUriFlags flags,
290 GUriError parse_error,
291 GError **error)
292 {
293 gchar c;
294 GString *decoded;
295 const gchar *invalid, *s, *end;
296 gssize len;
297
298 if (!(flags & G_URI_FLAGS_ENCODED))
299 just_normalize = FALSE;
300
301 decoded = g_string_sized_new (length + 1);
302 for (s = start, end = s + length; s < end; s++)
303 {
304 if (*s == '%')
305 {
306 if (s + 2 >= end ||
307 !g_ascii_isxdigit (s[1]) ||
308 !g_ascii_isxdigit (s[2]))
309 {
310 /* % followed by non-hex or the end of the string; this is an error */
311 if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
312 {
313 g_set_error_literal (error, G_URI_ERROR, parse_error,
314 /* xgettext: no-c-format */
315 _("Invalid %-encoding in URI"));
316 g_string_free (decoded, TRUE);
317 return -1;
318 }
319
320 /* In non-strict mode, just let it through; we *don't*
321 * fix it to "%25", since that might change the way that
322 * the URI's owner would interpret it.
323 */
324 g_string_append_c (decoded, *s);
325 continue;
326 }
327
328 c = HEXCHAR (s);
329 if (illegal_chars && strchr (illegal_chars, c))
330 {
331 g_set_error_literal (error, G_URI_ERROR, parse_error,
332 _("Illegal character in URI"));
333 g_string_free (decoded, TRUE);
334 return -1;
335 }
336 if (just_normalize && !g_uri_char_is_unreserved (c))
337 {
338 /* Leave the % sequence there but normalize it. */
339 g_string_append_c (decoded, *s);
340 g_string_append_c (decoded, g_ascii_toupper (s[1]));
341 g_string_append_c (decoded, g_ascii_toupper (s[2]));
342 s += 2;
343 }
344 else
345 {
346 g_string_append_c (decoded, c);
347 s += 2;
348 }
349 }
350 else if (www_form && *s == '+')
351 g_string_append_c (decoded, ' ');
352 /* Normalize any illegal characters. */
353 else if (just_normalize && (!g_ascii_isgraph (*s)))
354 g_string_append_printf (decoded, "%%%02X", (guchar)*s);
355 else
356 g_string_append_c (decoded, *s);
357 }
358
359 len = decoded->len;
360 g_assert (len >= 0);
361
362 if (!(flags & G_URI_FLAGS_ENCODED) &&
363 !g_utf8_validate (decoded->str, len, &invalid))
364 {
365 g_set_error_literal (error, G_URI_ERROR, parse_error,
366 _("Non-UTF-8 characters in URI"));
367 g_string_free (decoded, TRUE);
368 return -1;
369 }
370
371 if (out)
372 *out = g_string_free (decoded, FALSE);
373 else
374 g_string_free (decoded, TRUE);
375
376 return len;
377 }
378
379 static gboolean
uri_decode(gchar ** out,const gchar * illegal_chars,const gchar * start,gsize length,gboolean www_form,GUriFlags flags,GUriError parse_error,GError ** error)380 uri_decode (gchar **out,
381 const gchar *illegal_chars,
382 const gchar *start,
383 gsize length,
384 gboolean www_form,
385 GUriFlags flags,
386 GUriError parse_error,
387 GError **error)
388 {
389 return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
390 parse_error, error) != -1;
391 }
392
393 static gboolean
uri_normalize(gchar ** out,const gchar * start,gsize length,GUriFlags flags,GUriError parse_error,GError ** error)394 uri_normalize (gchar **out,
395 const gchar *start,
396 gsize length,
397 GUriFlags flags,
398 GUriError parse_error,
399 GError **error)
400 {
401 return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
402 parse_error, error) != -1;
403 }
404
405 static gboolean
is_valid(guchar c,const gchar * reserved_chars_allowed)406 is_valid (guchar c,
407 const gchar *reserved_chars_allowed)
408 {
409 if (g_uri_char_is_unreserved (c))
410 return TRUE;
411
412 if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
413 return TRUE;
414
415 return FALSE;
416 }
417
418 void
_uri_encoder(GString * out,const guchar * start,gsize length,const gchar * reserved_chars_allowed,gboolean allow_utf8)419 _uri_encoder (GString *out,
420 const guchar *start,
421 gsize length,
422 const gchar *reserved_chars_allowed,
423 gboolean allow_utf8)
424 {
425 static const gchar hex[16] = "0123456789ABCDEF";
426 const guchar *p = start;
427 const guchar *end = p + length;
428
429 while (p < end)
430 {
431 gunichar multibyte_utf8_char = 0;
432
433 if (allow_utf8 && *p >= 0x80)
434 multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
435
436 if (multibyte_utf8_char > 0 &&
437 multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
438 {
439 gint len = g_utf8_skip [*p];
440 g_string_append_len (out, (gchar *)p, len);
441 p += len;
442 }
443 else if (is_valid (*p, reserved_chars_allowed))
444 {
445 g_string_append_c (out, *p);
446 p++;
447 }
448 else
449 {
450 g_string_append_c (out, '%');
451 g_string_append_c (out, hex[*p >> 4]);
452 g_string_append_c (out, hex[*p & 0xf]);
453 p++;
454 }
455 }
456 }
457
458 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
459 * support IPv6 zone identifiers.
460 *
461 * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
462 * There’s no point supporting them until (a) they exist and (b) the rest of the
463 * stack (notably, sockets) supports them.
464 *
465 * Rules:
466 *
467 * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]"
468 *
469 * ZoneID = 1*( unreserved / pct-encoded )
470 *
471 * IPv6addrz = IPv6address "%25" ZoneID
472 *
473 * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
474 *
475 * IPv6addrz = IPv6address "%" ZoneID
476 */
477 static gboolean
parse_ip_literal(const gchar * start,gsize length,GUriFlags flags,gchar ** out,GError ** error)478 parse_ip_literal (const gchar *start,
479 gsize length,
480 GUriFlags flags,
481 gchar **out,
482 GError **error)
483 {
484 gchar *pct, *zone_id = NULL;
485 gchar *addr = NULL;
486 gsize addr_length = 0;
487 gsize zone_id_length = 0;
488 gchar *decoded_zone_id = NULL;
489
490 if (start[length - 1] != ']')
491 goto bad_ipv6_literal;
492
493 /* Drop the square brackets */
494 addr = g_strndup (start + 1, length - 2);
495 addr_length = length - 2;
496
497 /* If there's an IPv6 scope ID, split out the zone. */
498 pct = strchr (addr, '%');
499 if (pct != NULL)
500 {
501 *pct = '\0';
502
503 if (addr_length - (pct - addr) >= 4 &&
504 *(pct + 1) == '2' && *(pct + 2) == '5')
505 {
506 zone_id = pct + 3;
507 zone_id_length = addr_length - (zone_id - addr);
508 }
509 else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
510 addr_length - (pct - addr) >= 2)
511 {
512 zone_id = pct + 1;
513 zone_id_length = addr_length - (zone_id - addr);
514 }
515 else
516 goto bad_ipv6_literal;
517
518 g_assert (zone_id_length >= 1);
519 }
520
521 /* addr must be an IPv6 address */
522 if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
523 goto bad_ipv6_literal;
524
525 /* Zone ID must be valid. It can contain %-encoded characters. */
526 if (zone_id != NULL &&
527 !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
528 flags, G_URI_ERROR_BAD_HOST, NULL))
529 goto bad_ipv6_literal;
530
531 /* Success */
532 if (out != NULL && decoded_zone_id != NULL)
533 *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
534 else if (out != NULL)
535 *out = g_steal_pointer (&addr);
536
537 g_free (addr);
538 g_free (decoded_zone_id);
539
540 return TRUE;
541
542 bad_ipv6_literal:
543 g_free (addr);
544 g_free (decoded_zone_id);
545 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
546 _("Invalid IPv6 address ‘%.*s’ in URI"),
547 (gint)length, start);
548
549 return FALSE;
550 }
551
552 static gboolean
parse_host(const gchar * start,gsize length,GUriFlags flags,gchar ** out,GError ** error)553 parse_host (const gchar *start,
554 gsize length,
555 GUriFlags flags,
556 gchar **out,
557 GError **error)
558 {
559 gchar *decoded = NULL, *host;
560 gchar *addr = NULL;
561
562 if (*start == '[')
563 {
564 if (!parse_ip_literal (start, length, flags, &host, error))
565 return FALSE;
566 goto ok;
567 }
568
569 if (g_ascii_isdigit (*start))
570 {
571 addr = g_strndup (start, length);
572 if (g_hostname_is_ip_address (addr))
573 {
574 host = addr;
575 goto ok;
576 }
577 g_free (addr);
578 }
579
580 if (flags & G_URI_FLAGS_NON_DNS)
581 {
582 if (!uri_normalize (&decoded, start, length, flags,
583 G_URI_ERROR_BAD_HOST, error))
584 return FALSE;
585 host = g_steal_pointer (&decoded);
586 goto ok;
587 }
588
589 flags &= ~G_URI_FLAGS_ENCODED;
590 if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
591 G_URI_ERROR_BAD_HOST, error))
592 return FALSE;
593
594 /* You're not allowed to %-encode an IP address, so if it wasn't
595 * one before, it better not be one now.
596 */
597 if (g_hostname_is_ip_address (decoded))
598 {
599 g_free (decoded);
600 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
601 _("Illegal encoded IP address ‘%.*s’ in URI"),
602 (gint)length, start);
603 return FALSE;
604 }
605
606 if (g_hostname_is_non_ascii (decoded))
607 {
608 host = g_hostname_to_ascii (decoded);
609 if (host == NULL)
610 {
611 g_free (decoded);
612 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
613 _("Illegal internationalized hostname ‘%.*s’ in URI"),
614 (gint) length, start);
615 return FALSE;
616 }
617 }
618 else
619 {
620 host = g_steal_pointer (&decoded);
621 }
622
623 ok:
624 if (out)
625 *out = g_steal_pointer (&host);
626 g_free (host);
627 g_free (decoded);
628
629 return TRUE;
630 }
631
632 static gboolean
parse_port(const gchar * start,gsize length,gint * out,GError ** error)633 parse_port (const gchar *start,
634 gsize length,
635 gint *out,
636 GError **error)
637 {
638 gchar *end;
639 gulong parsed_port;
640
641 /* strtoul() allows leading + or -, so we have to check this first. */
642 if (!g_ascii_isdigit (*start))
643 {
644 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
645 _("Could not parse port ‘%.*s’ in URI"),
646 (gint)length, start);
647 return FALSE;
648 }
649
650 /* We know that *(start + length) is either '\0' or a non-numeric
651 * character, so strtoul() won't scan beyond it.
652 */
653 parsed_port = strtoul (start, &end, 10);
654 if (end != start + length)
655 {
656 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
657 _("Could not parse port ‘%.*s’ in URI"),
658 (gint)length, start);
659 return FALSE;
660 }
661 else if (parsed_port > 65535)
662 {
663 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
664 _("Port ‘%.*s’ in URI is out of range"),
665 (gint)length, start);
666 return FALSE;
667 }
668
669 if (out)
670 *out = parsed_port;
671 return TRUE;
672 }
673
674 static gboolean
parse_userinfo(const gchar * start,gsize length,GUriFlags flags,gchar ** user,gchar ** password,gchar ** auth_params,GError ** error)675 parse_userinfo (const gchar *start,
676 gsize length,
677 GUriFlags flags,
678 gchar **user,
679 gchar **password,
680 gchar **auth_params,
681 GError **error)
682 {
683 const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
684
685 auth_params_end = start + length;
686 if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
687 password_end = memchr (start, ';', auth_params_end - start);
688 if (!password_end)
689 password_end = auth_params_end;
690 if (flags & G_URI_FLAGS_HAS_PASSWORD)
691 user_end = memchr (start, ':', password_end - start);
692 if (!user_end)
693 user_end = password_end;
694
695 if (!uri_normalize (user, start, user_end - start, flags,
696 G_URI_ERROR_BAD_USER, error))
697 return FALSE;
698
699 if (*user_end == ':')
700 {
701 start = user_end + 1;
702 if (!uri_normalize (password, start, password_end - start, flags,
703 G_URI_ERROR_BAD_PASSWORD, error))
704 {
705 if (user)
706 g_clear_pointer (user, g_free);
707 return FALSE;
708 }
709 }
710 else if (password)
711 *password = NULL;
712
713 if (*password_end == ';')
714 {
715 start = password_end + 1;
716 if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
717 G_URI_ERROR_BAD_AUTH_PARAMS, error))
718 {
719 if (user)
720 g_clear_pointer (user, g_free);
721 if (password)
722 g_clear_pointer (password, g_free);
723 return FALSE;
724 }
725 }
726 else if (auth_params)
727 *auth_params = NULL;
728
729 return TRUE;
730 }
731
732 static gchar *
uri_cleanup(const gchar * uri_string)733 uri_cleanup (const gchar *uri_string)
734 {
735 GString *copy;
736 const gchar *end;
737
738 /* Skip leading whitespace */
739 while (g_ascii_isspace (*uri_string))
740 uri_string++;
741
742 /* Ignore trailing whitespace */
743 end = uri_string + strlen (uri_string);
744 while (end > uri_string && g_ascii_isspace (*(end - 1)))
745 end--;
746
747 /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
748 copy = g_string_sized_new (end - uri_string);
749 while (uri_string < end)
750 {
751 if (*uri_string == ' ')
752 g_string_append (copy, "%20");
753 else if (g_ascii_isspace (*uri_string))
754 ;
755 else
756 g_string_append_c (copy, *uri_string);
757 uri_string++;
758 }
759
760 return g_string_free (copy, FALSE);
761 }
762
763 static gboolean
should_normalize_empty_path(const char * scheme)764 should_normalize_empty_path (const char *scheme)
765 {
766 const char * const schemes[] = { "https", "http", "wss", "ws" };
767 gsize i;
768 for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
769 {
770 if (!strcmp (schemes[i], scheme))
771 return TRUE;
772 }
773 return FALSE;
774 }
775
776 static int
normalize_port(const char * scheme,int port)777 normalize_port (const char *scheme,
778 int port)
779 {
780 const char *default_schemes[3] = { NULL };
781 int i;
782
783 switch (port)
784 {
785 case 21:
786 default_schemes[0] = "ftp";
787 break;
788 case 80:
789 default_schemes[0] = "http";
790 default_schemes[1] = "ws";
791 break;
792 case 443:
793 default_schemes[0] = "https";
794 default_schemes[1] = "wss";
795 break;
796 default:
797 break;
798 }
799
800 for (i = 0; default_schemes[i]; ++i)
801 {
802 if (!strcmp (scheme, default_schemes[i]))
803 return -1;
804 }
805
806 return port;
807 }
808
809 static int
default_scheme_port(const char * scheme)810 default_scheme_port (const char *scheme)
811 {
812 if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
813 return 80;
814
815 if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
816 return 443;
817
818 if (strcmp (scheme, "ftp") == 0)
819 return 21;
820
821 return -1;
822 }
823
824 static gboolean
g_uri_split_internal(const gchar * uri_string,GUriFlags flags,gchar ** scheme,gchar ** userinfo,gchar ** user,gchar ** password,gchar ** auth_params,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)825 g_uri_split_internal (const gchar *uri_string,
826 GUriFlags flags,
827 gchar **scheme,
828 gchar **userinfo,
829 gchar **user,
830 gchar **password,
831 gchar **auth_params,
832 gchar **host,
833 gint *port,
834 gchar **path,
835 gchar **query,
836 gchar **fragment,
837 GError **error)
838 {
839 const gchar *end, *colon, *at, *path_start, *semi, *question;
840 const gchar *p, *bracket, *hostend;
841 gchar *cleaned_uri_string = NULL;
842 gchar *normalized_scheme = NULL;
843
844 if (scheme)
845 *scheme = NULL;
846 if (userinfo)
847 *userinfo = NULL;
848 if (user)
849 *user = NULL;
850 if (password)
851 *password = NULL;
852 if (auth_params)
853 *auth_params = NULL;
854 if (host)
855 *host = NULL;
856 if (port)
857 *port = -1;
858 if (path)
859 *path = NULL;
860 if (query)
861 *query = NULL;
862 if (fragment)
863 *fragment = NULL;
864
865 if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
866 {
867 cleaned_uri_string = uri_cleanup (uri_string);
868 uri_string = cleaned_uri_string;
869 }
870
871 /* Find scheme */
872 p = uri_string;
873 while (*p && (g_ascii_isalpha (*p) ||
874 (p > uri_string && (g_ascii_isdigit (*p) ||
875 *p == '.' || *p == '+' || *p == '-'))))
876 p++;
877
878 if (p > uri_string && *p == ':')
879 {
880 normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
881 if (scheme)
882 *scheme = g_steal_pointer (&normalized_scheme);
883 p++;
884 }
885 else
886 {
887 if (scheme)
888 *scheme = NULL;
889 p = uri_string;
890 }
891
892 /* Check for authority */
893 if (strncmp (p, "//", 2) == 0)
894 {
895 p += 2;
896
897 path_start = p + strcspn (p, "/?#");
898 at = memchr (p, '@', path_start - p);
899 if (at)
900 {
901 if (flags & G_URI_FLAGS_PARSE_RELAXED)
902 {
903 gchar *next_at;
904
905 /* Any "@"s in the userinfo must be %-encoded, but
906 * people get this wrong sometimes. Since "@"s in the
907 * hostname are unlikely (and also wrong anyway), assume
908 * that if there are extra "@"s, they belong in the
909 * userinfo.
910 */
911 do
912 {
913 next_at = memchr (at + 1, '@', path_start - (at + 1));
914 if (next_at)
915 at = next_at;
916 }
917 while (next_at);
918 }
919
920 if (user || password || auth_params ||
921 (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
922 {
923 if (!parse_userinfo (p, at - p, flags,
924 user, password, auth_params,
925 error))
926 goto fail;
927 }
928
929 if (!uri_normalize (userinfo, p, at - p, flags,
930 G_URI_ERROR_BAD_USER, error))
931 goto fail;
932
933 p = at + 1;
934 }
935
936 if (flags & G_URI_FLAGS_PARSE_RELAXED)
937 {
938 semi = strchr (p, ';');
939 if (semi && semi < path_start)
940 {
941 /* Technically, semicolons are allowed in the "host"
942 * production, but no one ever does this, and some
943 * schemes mistakenly use semicolon as a delimiter
944 * marking the start of the path. We have to check this
945 * after checking for userinfo though, because a
946 * semicolon before the "@" must be part of the
947 * userinfo.
948 */
949 path_start = semi;
950 }
951 }
952
953 /* Find host and port. The host may be a bracket-delimited IPv6
954 * address, in which case the colon delimiting the port must come
955 * (immediately) after the close bracket.
956 */
957 if (*p == '[')
958 {
959 bracket = memchr (p, ']', path_start - p);
960 if (bracket && *(bracket + 1) == ':')
961 colon = bracket + 1;
962 else
963 colon = NULL;
964 }
965 else
966 colon = memchr (p, ':', path_start - p);
967
968 hostend = colon ? colon : path_start;
969 if (!parse_host (p, hostend - p, flags, host, error))
970 goto fail;
971
972 if (colon && colon != path_start - 1)
973 {
974 p = colon + 1;
975 if (!parse_port (p, path_start - p, port, error))
976 goto fail;
977 }
978
979 p = path_start;
980 }
981
982 /* Find fragment. */
983 end = p + strcspn (p, "#");
984 if (*end == '#')
985 {
986 if (!uri_normalize (fragment, end + 1, strlen (end + 1),
987 flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
988 G_URI_ERROR_BAD_FRAGMENT, error))
989 goto fail;
990 }
991
992 /* Find query */
993 question = memchr (p, '?', end - p);
994 if (question)
995 {
996 if (!uri_normalize (query, question + 1, end - (question + 1),
997 flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
998 G_URI_ERROR_BAD_QUERY, error))
999 goto fail;
1000 end = question;
1001 }
1002
1003 if (!uri_normalize (path, p, end - p,
1004 flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1005 G_URI_ERROR_BAD_PATH, error))
1006 goto fail;
1007
1008 /* Scheme-based normalization */
1009 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1010 {
1011 const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1012
1013 if (should_normalize_empty_path (scheme_str) && path && !**path)
1014 {
1015 g_free (*path);
1016 *path = g_strdup ("/");
1017 }
1018
1019 if (port && *port == -1)
1020 *port = default_scheme_port (scheme_str);
1021 }
1022
1023 g_free (normalized_scheme);
1024 g_free (cleaned_uri_string);
1025 return TRUE;
1026
1027 fail:
1028 if (scheme)
1029 g_clear_pointer (scheme, g_free);
1030 if (userinfo)
1031 g_clear_pointer (userinfo, g_free);
1032 if (host)
1033 g_clear_pointer (host, g_free);
1034 if (port)
1035 *port = -1;
1036 if (path)
1037 g_clear_pointer (path, g_free);
1038 if (query)
1039 g_clear_pointer (query, g_free);
1040 if (fragment)
1041 g_clear_pointer (fragment, g_free);
1042
1043 g_free (normalized_scheme);
1044 g_free (cleaned_uri_string);
1045 return FALSE;
1046 }
1047
1048 /**
1049 * g_uri_split:
1050 * @uri_ref: a string containing a relative or absolute URI
1051 * @flags: flags for parsing @uri_ref
1052 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1053 * the scheme (converted to lowercase), or %NULL
1054 * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1055 * the userinfo, or %NULL
1056 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1057 * host, or %NULL
1058 * @port: (out) (optional) (transfer full): on return, contains the
1059 * port, or `-1`
1060 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1061 * path
1062 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1063 * query, or %NULL
1064 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1065 * the fragment, or %NULL
1066 * @error: #GError for error reporting, or %NULL to ignore.
1067 *
1068 * Parses @uri_ref (which can be an
1069 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1070 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1071 * returned as %NULL (but note that all URIs always have a path component,
1072 * though it may be the empty string).
1073 *
1074 * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1075 * @uri_ref will remain encoded in the output strings. (If not,
1076 * then all such characters will be decoded.) Note that decoding will
1077 * only work if the URI components are ASCII or UTF-8, so you will
1078 * need to use %G_URI_FLAGS_ENCODED if they are not.
1079 *
1080 * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1081 * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1082 * since it always returns only the full userinfo; use
1083 * g_uri_split_with_user() if you want it split up.
1084 *
1085 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1086 * on error.
1087 *
1088 * Since: 2.66
1089 */
1090 gboolean
g_uri_split(const gchar * uri_ref,GUriFlags flags,gchar ** scheme,gchar ** userinfo,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)1091 g_uri_split (const gchar *uri_ref,
1092 GUriFlags flags,
1093 gchar **scheme,
1094 gchar **userinfo,
1095 gchar **host,
1096 gint *port,
1097 gchar **path,
1098 gchar **query,
1099 gchar **fragment,
1100 GError **error)
1101 {
1102 g_return_val_if_fail (uri_ref != NULL, FALSE);
1103 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1104
1105 return g_uri_split_internal (uri_ref, flags,
1106 scheme, userinfo, NULL, NULL, NULL,
1107 host, port, path, query, fragment,
1108 error);
1109 }
1110
1111 /**
1112 * g_uri_split_with_user:
1113 * @uri_ref: a string containing a relative or absolute URI
1114 * @flags: flags for parsing @uri_ref
1115 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1116 * the scheme (converted to lowercase), or %NULL
1117 * @user: (out) (nullable) (optional) (transfer full): on return, contains
1118 * the user, or %NULL
1119 * @password: (out) (nullable) (optional) (transfer full): on return, contains
1120 * the password, or %NULL
1121 * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1122 * the auth_params, or %NULL
1123 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1124 * host, or %NULL
1125 * @port: (out) (optional) (transfer full): on return, contains the
1126 * port, or `-1`
1127 * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1128 * path
1129 * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1130 * query, or %NULL
1131 * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1132 * the fragment, or %NULL
1133 * @error: #GError for error reporting, or %NULL to ignore.
1134 *
1135 * Parses @uri_ref (which can be an
1136 * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1137 * returns the pieces. Any component that doesn't appear in @uri_ref will be
1138 * returned as %NULL (but note that all URIs always have a path component,
1139 * though it may be the empty string).
1140 *
1141 * See g_uri_split(), and the definition of #GUriFlags, for more
1142 * information on the effect of @flags. Note that @password will only
1143 * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1144 * @auth_params will only be parsed out if @flags contains
1145 * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1146 *
1147 * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1148 * on error.
1149 *
1150 * Since: 2.66
1151 */
1152 gboolean
g_uri_split_with_user(const gchar * uri_ref,GUriFlags flags,gchar ** scheme,gchar ** user,gchar ** password,gchar ** auth_params,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)1153 g_uri_split_with_user (const gchar *uri_ref,
1154 GUriFlags flags,
1155 gchar **scheme,
1156 gchar **user,
1157 gchar **password,
1158 gchar **auth_params,
1159 gchar **host,
1160 gint *port,
1161 gchar **path,
1162 gchar **query,
1163 gchar **fragment,
1164 GError **error)
1165 {
1166 g_return_val_if_fail (uri_ref != NULL, FALSE);
1167 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1168
1169 return g_uri_split_internal (uri_ref, flags,
1170 scheme, NULL, user, password, auth_params,
1171 host, port, path, query, fragment,
1172 error);
1173 }
1174
1175
1176 /**
1177 * g_uri_split_network:
1178 * @uri_string: a string containing an absolute URI
1179 * @flags: flags for parsing @uri_string
1180 * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1181 * the scheme (converted to lowercase), or %NULL
1182 * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1183 * host, or %NULL
1184 * @port: (out) (optional) (transfer full): on return, contains the
1185 * port, or `-1`
1186 * @error: #GError for error reporting, or %NULL to ignore.
1187 *
1188 * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1189 * according to @flags, and returns the pieces relevant to connecting to a host.
1190 * See the documentation for g_uri_split() for more details; this is
1191 * mostly a wrapper around that function with simpler arguments.
1192 * However, it will return an error if @uri_string is a relative URI,
1193 * or does not contain a hostname component.
1194 *
1195 * Returns: (skip): %TRUE if @uri_string parsed successfully,
1196 * %FALSE on error.
1197 *
1198 * Since: 2.66
1199 */
1200 gboolean
g_uri_split_network(const gchar * uri_string,GUriFlags flags,gchar ** scheme,gchar ** host,gint * port,GError ** error)1201 g_uri_split_network (const gchar *uri_string,
1202 GUriFlags flags,
1203 gchar **scheme,
1204 gchar **host,
1205 gint *port,
1206 GError **error)
1207 {
1208 gchar *my_scheme = NULL, *my_host = NULL;
1209
1210 g_return_val_if_fail (uri_string != NULL, FALSE);
1211 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1212
1213 if (!g_uri_split_internal (uri_string, flags,
1214 &my_scheme, NULL, NULL, NULL, NULL,
1215 &my_host, port, NULL, NULL, NULL,
1216 error))
1217 return FALSE;
1218
1219 if (!my_scheme || !my_host)
1220 {
1221 if (!my_scheme)
1222 {
1223 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1224 _("URI ‘%s’ is not an absolute URI"),
1225 uri_string);
1226 }
1227 else
1228 {
1229 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1230 _("URI ‘%s’ has no host component"),
1231 uri_string);
1232 }
1233 g_free (my_scheme);
1234 g_free (my_host);
1235
1236 return FALSE;
1237 }
1238
1239 if (scheme)
1240 *scheme = g_steal_pointer (&my_scheme);
1241 if (host)
1242 *host = g_steal_pointer (&my_host);
1243
1244 g_free (my_scheme);
1245 g_free (my_host);
1246
1247 return TRUE;
1248 }
1249
1250 /**
1251 * g_uri_is_valid:
1252 * @uri_string: a string containing an absolute URI
1253 * @flags: flags for parsing @uri_string
1254 * @error: #GError for error reporting, or %NULL to ignore.
1255 *
1256 * Parses @uri_string according to @flags, to determine whether it is a valid
1257 * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1258 * relative to another URI using g_uri_parse_relative().
1259 *
1260 * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1261 *
1262 * See g_uri_split(), and the definition of #GUriFlags, for more
1263 * information on the effect of @flags.
1264 *
1265 * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1266 *
1267 * Since: 2.66
1268 */
1269 gboolean
g_uri_is_valid(const gchar * uri_string,GUriFlags flags,GError ** error)1270 g_uri_is_valid (const gchar *uri_string,
1271 GUriFlags flags,
1272 GError **error)
1273 {
1274 gchar *my_scheme = NULL;
1275
1276 g_return_val_if_fail (uri_string != NULL, FALSE);
1277 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1278
1279 if (!g_uri_split_internal (uri_string, flags,
1280 &my_scheme, NULL, NULL, NULL, NULL,
1281 NULL, NULL, NULL, NULL, NULL,
1282 error))
1283 return FALSE;
1284
1285 if (!my_scheme)
1286 {
1287 g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1288 _("URI ‘%s’ is not an absolute URI"),
1289 uri_string);
1290 return FALSE;
1291 }
1292
1293 g_free (my_scheme);
1294
1295 return TRUE;
1296 }
1297
1298
1299 /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1300 * RFC 3986.
1301 *
1302 * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1303 */
1304 static void
remove_dot_segments(gchar * path)1305 remove_dot_segments (gchar *path)
1306 {
1307 /* The output can be written to the same buffer that the input
1308 * is read from, as the output pointer is only ever increased
1309 * when the input pointer is increased as well, and the input
1310 * pointer is never decreased. */
1311 gchar *input = path;
1312 gchar *output = path;
1313
1314 if (!*path)
1315 return;
1316
1317 while (*input)
1318 {
1319 /* A. If the input buffer begins with a prefix of "../" or "./",
1320 * then remove that prefix from the input buffer; otherwise,
1321 */
1322 if (strncmp (input, "../", 3) == 0)
1323 input += 3;
1324 else if (strncmp (input, "./", 2) == 0)
1325 input += 2;
1326
1327 /* B. if the input buffer begins with a prefix of "/./" or "/.",
1328 * where "." is a complete path segment, then replace that
1329 * prefix with "/" in the input buffer; otherwise,
1330 */
1331 else if (strncmp (input, "/./", 3) == 0)
1332 input += 2;
1333 else if (strcmp (input, "/.") == 0)
1334 input[1] = '\0';
1335
1336 /* C. if the input buffer begins with a prefix of "/../" or "/..",
1337 * where ".." is a complete path segment, then replace that
1338 * prefix with "/" in the input buffer and remove the last
1339 * segment and its preceding "/" (if any) from the output
1340 * buffer; otherwise,
1341 */
1342 else if (strncmp (input, "/../", 4) == 0)
1343 {
1344 input += 3;
1345 if (output > path)
1346 {
1347 do
1348 {
1349 output--;
1350 }
1351 while (*output != '/' && output > path);
1352 }
1353 }
1354 else if (strcmp (input, "/..") == 0)
1355 {
1356 input[1] = '\0';
1357 if (output > path)
1358 {
1359 do
1360 {
1361 output--;
1362 }
1363 while (*output != '/' && output > path);
1364 }
1365 }
1366
1367 /* D. if the input buffer consists only of "." or "..", then remove
1368 * that from the input buffer; otherwise,
1369 */
1370 else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1371 input[0] = '\0';
1372
1373 /* E. move the first path segment in the input buffer to the end of
1374 * the output buffer, including the initial "/" character (if
1375 * any) and any subsequent characters up to, but not including,
1376 * the next "/" character or the end of the input buffer.
1377 */
1378 else
1379 {
1380 *output++ = *input++;
1381 while (*input && *input != '/')
1382 *output++ = *input++;
1383 }
1384 }
1385 *output = '\0';
1386 }
1387
1388 /**
1389 * g_uri_parse:
1390 * @uri_string: a string representing an absolute URI
1391 * @flags: flags describing how to parse @uri_string
1392 * @error: #GError for error reporting, or %NULL to ignore.
1393 *
1394 * Parses @uri_string according to @flags. If the result is not a
1395 * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1396 * error returned.
1397 *
1398 * Return value: (transfer full): a new #GUri, or NULL on error.
1399 *
1400 * Since: 2.66
1401 */
1402 GUri *
g_uri_parse(const gchar * uri_string,GUriFlags flags,GError ** error)1403 g_uri_parse (const gchar *uri_string,
1404 GUriFlags flags,
1405 GError **error)
1406 {
1407 g_return_val_if_fail (uri_string != NULL, NULL);
1408 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1409
1410 return g_uri_parse_relative (NULL, uri_string, flags, error);
1411 }
1412
1413 /**
1414 * g_uri_parse_relative:
1415 * @base_uri: (nullable) (transfer none): a base absolute URI
1416 * @uri_ref: a string representing a relative or absolute URI
1417 * @flags: flags describing how to parse @uri_ref
1418 * @error: #GError for error reporting, or %NULL to ignore.
1419 *
1420 * Parses @uri_ref according to @flags and, if it is a
1421 * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1422 * If the result is not a valid absolute URI, it will be discarded, and an error
1423 * returned.
1424 *
1425 * Return value: (transfer full): a new #GUri, or NULL on error.
1426 *
1427 * Since: 2.66
1428 */
1429 GUri *
g_uri_parse_relative(GUri * base_uri,const gchar * uri_ref,GUriFlags flags,GError ** error)1430 g_uri_parse_relative (GUri *base_uri,
1431 const gchar *uri_ref,
1432 GUriFlags flags,
1433 GError **error)
1434 {
1435 GUri *uri = NULL;
1436
1437 g_return_val_if_fail (uri_ref != NULL, NULL);
1438 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1439 g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1440
1441 /* Use GUri struct to construct the return value: there is no guarantee it is
1442 * actually correct within the function body. */
1443 uri = g_atomic_rc_box_new0 (GUri);
1444 uri->flags = flags;
1445
1446 if (!g_uri_split_internal (uri_ref, flags,
1447 &uri->scheme, &uri->userinfo,
1448 &uri->user, &uri->password, &uri->auth_params,
1449 &uri->host, &uri->port,
1450 &uri->path, &uri->query, &uri->fragment,
1451 error))
1452 {
1453 g_uri_unref (uri);
1454 return NULL;
1455 }
1456
1457 if (!uri->scheme && !base_uri)
1458 {
1459 g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1460 _("URI is not absolute, and no base URI was provided"));
1461 g_uri_unref (uri);
1462 return NULL;
1463 }
1464
1465 if (base_uri)
1466 {
1467 /* This is section 5.2.2 of RFC 3986, except that we're doing
1468 * it in place in @uri rather than copying from R to T.
1469 *
1470 * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1471 */
1472 if (uri->scheme)
1473 remove_dot_segments (uri->path);
1474 else
1475 {
1476 uri->scheme = g_strdup (base_uri->scheme);
1477 if (uri->host)
1478 remove_dot_segments (uri->path);
1479 else
1480 {
1481 if (!*uri->path)
1482 {
1483 g_free (uri->path);
1484 uri->path = g_strdup (base_uri->path);
1485 if (!uri->query)
1486 uri->query = g_strdup (base_uri->query);
1487 }
1488 else
1489 {
1490 if (*uri->path == '/')
1491 remove_dot_segments (uri->path);
1492 else
1493 {
1494 gchar *newpath, *last;
1495
1496 last = strrchr (base_uri->path, '/');
1497 if (last)
1498 {
1499 newpath = g_strdup_printf ("%.*s/%s",
1500 (gint)(last - base_uri->path),
1501 base_uri->path,
1502 uri->path);
1503 }
1504 else
1505 newpath = g_strdup_printf ("/%s", uri->path);
1506
1507 g_free (uri->path);
1508 uri->path = g_steal_pointer (&newpath);
1509
1510 remove_dot_segments (uri->path);
1511 }
1512 }
1513
1514 uri->userinfo = g_strdup (base_uri->userinfo);
1515 uri->user = g_strdup (base_uri->user);
1516 uri->password = g_strdup (base_uri->password);
1517 uri->auth_params = g_strdup (base_uri->auth_params);
1518 uri->host = g_strdup (base_uri->host);
1519 uri->port = base_uri->port;
1520 }
1521 }
1522
1523 /* Scheme normalization couldn't have been done earlier
1524 * as the relative URI may not have had a scheme */
1525 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1526 {
1527 if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1528 {
1529 g_free (uri->path);
1530 uri->path = g_strdup ("/");
1531 }
1532
1533 uri->port = normalize_port (uri->scheme, uri->port);
1534 }
1535 }
1536 else
1537 {
1538 remove_dot_segments (uri->path);
1539 }
1540
1541 return g_steal_pointer (&uri);
1542 }
1543
1544 /**
1545 * g_uri_resolve_relative:
1546 * @base_uri_string: (nullable): a string representing a base URI
1547 * @uri_ref: a string representing a relative or absolute URI
1548 * @flags: flags describing how to parse @uri_ref
1549 * @error: #GError for error reporting, or %NULL to ignore.
1550 *
1551 * Parses @uri_ref according to @flags and, if it is a
1552 * [relative URI][relative-absolute-uris], resolves it relative to
1553 * @base_uri_string. If the result is not a valid absolute URI, it will be
1554 * discarded, and an error returned.
1555 *
1556 * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1557 * %NULL if @uri_ref is invalid or not absolute.)
1558 *
1559 * Return value: (transfer full): the resolved URI string,
1560 * or NULL on error.
1561 *
1562 * Since: 2.66
1563 */
1564 gchar *
g_uri_resolve_relative(const gchar * base_uri_string,const gchar * uri_ref,GUriFlags flags,GError ** error)1565 g_uri_resolve_relative (const gchar *base_uri_string,
1566 const gchar *uri_ref,
1567 GUriFlags flags,
1568 GError **error)
1569 {
1570 GUri *base_uri, *resolved_uri;
1571 gchar *resolved_uri_string;
1572
1573 g_return_val_if_fail (uri_ref != NULL, NULL);
1574 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1575
1576 flags |= G_URI_FLAGS_ENCODED;
1577
1578 if (base_uri_string)
1579 {
1580 base_uri = g_uri_parse (base_uri_string, flags, error);
1581 if (!base_uri)
1582 return NULL;
1583 }
1584 else
1585 base_uri = NULL;
1586
1587 resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1588 if (base_uri)
1589 g_uri_unref (base_uri);
1590 if (!resolved_uri)
1591 return NULL;
1592
1593 resolved_uri_string = g_uri_to_string (resolved_uri);
1594 g_uri_unref (resolved_uri);
1595 return g_steal_pointer (&resolved_uri_string);
1596 }
1597
1598 /* userinfo as a whole can contain sub-delims + ":", but split-out
1599 * user can't contain ":" or ";", and split-out password can't contain
1600 * ";".
1601 */
1602 #define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1603 #define USER_ALLOWED_CHARS "!$&'()*+,="
1604 #define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1605 #define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1606 #define IP_ADDR_ALLOWED_CHARS ":"
1607 #define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1608 #define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1609 #define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1610 #define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1611
1612 static gchar *
g_uri_join_internal(GUriFlags flags,const gchar * scheme,gboolean userinfo,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1613 g_uri_join_internal (GUriFlags flags,
1614 const gchar *scheme,
1615 gboolean userinfo,
1616 const gchar *user,
1617 const gchar *password,
1618 const gchar *auth_params,
1619 const gchar *host,
1620 gint port,
1621 const gchar *path,
1622 const gchar *query,
1623 const gchar *fragment)
1624 {
1625 gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1626 GString *str;
1627 char *normalized_scheme = NULL;
1628
1629 /* Restrictions on path prefixes. See:
1630 * https://tools.ietf.org/html/rfc3986#section-3
1631 */
1632 g_return_val_if_fail (path != NULL, NULL);
1633 g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1634 g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1635
1636 str = g_string_new (scheme);
1637 if (scheme)
1638 g_string_append_c (str, ':');
1639
1640 if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1641 normalized_scheme = g_ascii_strdown (scheme, -1);
1642
1643 if (host)
1644 {
1645 g_string_append (str, "//");
1646
1647 if (user)
1648 {
1649 if (encoded)
1650 g_string_append (str, user);
1651 else
1652 {
1653 if (userinfo)
1654 g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1655 else
1656 /* Encode ':' and ';' regardless of whether we have a
1657 * password or auth params, since it may be parsed later
1658 * under the assumption that it does.
1659 */
1660 g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1661 }
1662
1663 if (password)
1664 {
1665 g_string_append_c (str, ':');
1666 if (encoded)
1667 g_string_append (str, password);
1668 else
1669 g_string_append_uri_escaped (str, password,
1670 PASSWORD_ALLOWED_CHARS, TRUE);
1671 }
1672
1673 if (auth_params)
1674 {
1675 g_string_append_c (str, ';');
1676 if (encoded)
1677 g_string_append (str, auth_params);
1678 else
1679 g_string_append_uri_escaped (str, auth_params,
1680 AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1681 }
1682
1683 g_string_append_c (str, '@');
1684 }
1685
1686 if (strchr (host, ':') && g_hostname_is_ip_address (host))
1687 {
1688 g_string_append_c (str, '[');
1689 if (encoded)
1690 g_string_append (str, host);
1691 else
1692 g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1693 g_string_append_c (str, ']');
1694 }
1695 else
1696 {
1697 if (encoded)
1698 g_string_append (str, host);
1699 else
1700 g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1701 }
1702
1703 if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1704 g_string_append_printf (str, ":%d", port);
1705 }
1706
1707 if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1708 g_string_append (str, "/");
1709 else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1710 g_string_append (str, path);
1711 else
1712 g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1713
1714 g_free (normalized_scheme);
1715
1716 if (query)
1717 {
1718 g_string_append_c (str, '?');
1719 if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1720 g_string_append (str, query);
1721 else
1722 g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1723 }
1724 if (fragment)
1725 {
1726 g_string_append_c (str, '#');
1727 if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1728 g_string_append (str, fragment);
1729 else
1730 g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1731 }
1732
1733 return g_string_free (str, FALSE);
1734 }
1735
1736 /**
1737 * g_uri_join:
1738 * @flags: flags describing how to build the URI string
1739 * @scheme: (nullable): the URI scheme, or %NULL
1740 * @userinfo: (nullable): the userinfo component, or %NULL
1741 * @host: (nullable): the host component, or %NULL
1742 * @port: the port, or `-1`
1743 * @path: (not nullable): the path component
1744 * @query: (nullable): the query component, or %NULL
1745 * @fragment: (nullable): the fragment, or %NULL
1746 *
1747 * Joins the given components together according to @flags to create
1748 * an absolute URI string. @path may not be %NULL (though it may be the empty
1749 * string).
1750 *
1751 * When @host is present, @path must either be empty or begin with a slash (`/`)
1752 * character. When @host is not present, @path cannot begin with two slash
1753 characters (`//`). See
1754 * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1755 *
1756 * See also g_uri_join_with_user(), which allows specifying the
1757 * components of the ‘userinfo’ separately.
1758 *
1759 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1760 * in @flags.
1761 *
1762 * Return value: (not nullable) (transfer full): an absolute URI string
1763 *
1764 * Since: 2.66
1765 */
1766 gchar *
g_uri_join(GUriFlags flags,const gchar * scheme,const gchar * userinfo,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1767 g_uri_join (GUriFlags flags,
1768 const gchar *scheme,
1769 const gchar *userinfo,
1770 const gchar *host,
1771 gint port,
1772 const gchar *path,
1773 const gchar *query,
1774 const gchar *fragment)
1775 {
1776 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1777 g_return_val_if_fail (path != NULL, NULL);
1778
1779 return g_uri_join_internal (flags,
1780 scheme,
1781 TRUE, userinfo, NULL, NULL,
1782 host,
1783 port,
1784 path,
1785 query,
1786 fragment);
1787 }
1788
1789 /**
1790 * g_uri_join_with_user:
1791 * @flags: flags describing how to build the URI string
1792 * @scheme: (nullable): the URI scheme, or %NULL
1793 * @user: (nullable): the user component of the userinfo, or %NULL
1794 * @password: (nullable): the password component of the userinfo, or
1795 * %NULL
1796 * @auth_params: (nullable): the auth params of the userinfo, or
1797 * %NULL
1798 * @host: (nullable): the host component, or %NULL
1799 * @port: the port, or `-1`
1800 * @path: (not nullable): the path component
1801 * @query: (nullable): the query component, or %NULL
1802 * @fragment: (nullable): the fragment, or %NULL
1803 *
1804 * Joins the given components together according to @flags to create
1805 * an absolute URI string. @path may not be %NULL (though it may be the empty
1806 * string).
1807 *
1808 * In contrast to g_uri_join(), this allows specifying the components
1809 * of the ‘userinfo’ separately. It otherwise behaves the same.
1810 *
1811 * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1812 * in @flags.
1813 *
1814 * Return value: (not nullable) (transfer full): an absolute URI string
1815 *
1816 * Since: 2.66
1817 */
1818 gchar *
g_uri_join_with_user(GUriFlags flags,const gchar * scheme,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1819 g_uri_join_with_user (GUriFlags flags,
1820 const gchar *scheme,
1821 const gchar *user,
1822 const gchar *password,
1823 const gchar *auth_params,
1824 const gchar *host,
1825 gint port,
1826 const gchar *path,
1827 const gchar *query,
1828 const gchar *fragment)
1829 {
1830 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1831 g_return_val_if_fail (path != NULL, NULL);
1832
1833 return g_uri_join_internal (flags,
1834 scheme,
1835 FALSE, user, password, auth_params,
1836 host,
1837 port,
1838 path,
1839 query,
1840 fragment);
1841 }
1842
1843 /**
1844 * g_uri_build:
1845 * @flags: flags describing how to build the #GUri
1846 * @scheme: (not nullable): the URI scheme
1847 * @userinfo: (nullable): the userinfo component, or %NULL
1848 * @host: (nullable): the host component, or %NULL
1849 * @port: the port, or `-1`
1850 * @path: (not nullable): the path component
1851 * @query: (nullable): the query component, or %NULL
1852 * @fragment: (nullable): the fragment, or %NULL
1853 *
1854 * Creates a new #GUri from the given components according to @flags.
1855 *
1856 * See also g_uri_build_with_user(), which allows specifying the
1857 * components of the "userinfo" separately.
1858 *
1859 * Return value: (not nullable) (transfer full): a new #GUri
1860 *
1861 * Since: 2.66
1862 */
1863 GUri *
g_uri_build(GUriFlags flags,const gchar * scheme,const gchar * userinfo,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1864 g_uri_build (GUriFlags flags,
1865 const gchar *scheme,
1866 const gchar *userinfo,
1867 const gchar *host,
1868 gint port,
1869 const gchar *path,
1870 const gchar *query,
1871 const gchar *fragment)
1872 {
1873 GUri *uri;
1874
1875 g_return_val_if_fail (scheme != NULL, NULL);
1876 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1877 g_return_val_if_fail (path != NULL, NULL);
1878
1879 uri = g_atomic_rc_box_new0 (GUri);
1880 uri->flags = flags;
1881 uri->scheme = g_ascii_strdown (scheme, -1);
1882 uri->userinfo = g_strdup (userinfo);
1883 uri->host = g_strdup (host);
1884 uri->port = port;
1885 uri->path = g_strdup (path);
1886 uri->query = g_strdup (query);
1887 uri->fragment = g_strdup (fragment);
1888
1889 return g_steal_pointer (&uri);
1890 }
1891
1892 /**
1893 * g_uri_build_with_user:
1894 * @flags: flags describing how to build the #GUri
1895 * @scheme: (not nullable): the URI scheme
1896 * @user: (nullable): the user component of the userinfo, or %NULL
1897 * @password: (nullable): the password component of the userinfo, or %NULL
1898 * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1899 * @host: (nullable): the host component, or %NULL
1900 * @port: the port, or `-1`
1901 * @path: (not nullable): the path component
1902 * @query: (nullable): the query component, or %NULL
1903 * @fragment: (nullable): the fragment, or %NULL
1904 *
1905 * Creates a new #GUri from the given components according to @flags
1906 * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1907 * coherent with the passed values, in particular use `%`-encoded values with
1908 * %G_URI_FLAGS_ENCODED.
1909 *
1910 * In contrast to g_uri_build(), this allows specifying the components
1911 * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1912 * if either @password or @auth_params is non-%NULL.
1913 *
1914 * Return value: (not nullable) (transfer full): a new #GUri
1915 *
1916 * Since: 2.66
1917 */
1918 GUri *
g_uri_build_with_user(GUriFlags flags,const gchar * scheme,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1919 g_uri_build_with_user (GUriFlags flags,
1920 const gchar *scheme,
1921 const gchar *user,
1922 const gchar *password,
1923 const gchar *auth_params,
1924 const gchar *host,
1925 gint port,
1926 const gchar *path,
1927 const gchar *query,
1928 const gchar *fragment)
1929 {
1930 GUri *uri;
1931 GString *userinfo;
1932
1933 g_return_val_if_fail (scheme != NULL, NULL);
1934 g_return_val_if_fail (password == NULL || user != NULL, NULL);
1935 g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1936 g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1937 g_return_val_if_fail (path != NULL, NULL);
1938
1939 uri = g_atomic_rc_box_new0 (GUri);
1940 uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1941 uri->scheme = g_ascii_strdown (scheme, -1);
1942 uri->user = g_strdup (user);
1943 uri->password = g_strdup (password);
1944 uri->auth_params = g_strdup (auth_params);
1945 uri->host = g_strdup (host);
1946 uri->port = port;
1947 uri->path = g_strdup (path);
1948 uri->query = g_strdup (query);
1949 uri->fragment = g_strdup (fragment);
1950
1951 if (user)
1952 {
1953 userinfo = g_string_new (user);
1954 if (password)
1955 {
1956 g_string_append_c (userinfo, ':');
1957 g_string_append (userinfo, uri->password);
1958 }
1959 if (auth_params)
1960 {
1961 g_string_append_c (userinfo, ';');
1962 g_string_append (userinfo, uri->auth_params);
1963 }
1964 uri->userinfo = g_string_free (userinfo, FALSE);
1965 }
1966
1967 return g_steal_pointer (&uri);
1968 }
1969
1970 /**
1971 * g_uri_to_string:
1972 * @uri: a #GUri
1973 *
1974 * Returns a string representing @uri.
1975 *
1976 * This is not guaranteed to return a string which is identical to the
1977 * string that @uri was parsed from. However, if the source URI was
1978 * syntactically correct (according to RFC 3986), and it was parsed
1979 * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1980 * a string which is at least semantically equivalent to the source
1981 * URI (according to RFC 3986).
1982 *
1983 * If @uri might contain sensitive details, such as authentication parameters,
1984 * or private data in its query string, and the returned string is going to be
1985 * logged, then consider using g_uri_to_string_partial() to redact parts.
1986 *
1987 * Return value: (not nullable) (transfer full): a string representing @uri,
1988 * which the caller must free.
1989 *
1990 * Since: 2.66
1991 */
1992 gchar *
g_uri_to_string(GUri * uri)1993 g_uri_to_string (GUri *uri)
1994 {
1995 g_return_val_if_fail (uri != NULL, NULL);
1996
1997 return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
1998 }
1999
2000 /**
2001 * g_uri_to_string_partial:
2002 * @uri: a #GUri
2003 * @flags: flags describing what parts of @uri to hide
2004 *
2005 * Returns a string representing @uri, subject to the options in
2006 * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2007 *
2008 * Return value: (not nullable) (transfer full): a string representing
2009 * @uri, which the caller must free.
2010 *
2011 * Since: 2.66
2012 */
2013 gchar *
g_uri_to_string_partial(GUri * uri,GUriHideFlags flags)2014 g_uri_to_string_partial (GUri *uri,
2015 GUriHideFlags flags)
2016 {
2017 gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2018 gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2019 gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2020 gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2021 gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2022
2023 g_return_val_if_fail (uri != NULL, NULL);
2024
2025 if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2026 {
2027 return g_uri_join_with_user (uri->flags,
2028 uri->scheme,
2029 hide_user ? NULL : uri->user,
2030 hide_password ? NULL : uri->password,
2031 hide_auth_params ? NULL : uri->auth_params,
2032 uri->host,
2033 uri->port,
2034 uri->path,
2035 hide_query ? NULL : uri->query,
2036 hide_fragment ? NULL : uri->fragment);
2037 }
2038
2039 return g_uri_join (uri->flags,
2040 uri->scheme,
2041 hide_user ? NULL : uri->userinfo,
2042 uri->host,
2043 uri->port,
2044 uri->path,
2045 hide_query ? NULL : uri->query,
2046 hide_fragment ? NULL : uri->fragment);
2047 }
2048
2049 /* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2050 static guint
str_ascii_case_hash(gconstpointer v)2051 str_ascii_case_hash (gconstpointer v)
2052 {
2053 const signed char *p;
2054 guint32 h = 5381;
2055
2056 for (p = v; *p != '\0'; p++)
2057 h = (h << 5) + h + g_ascii_toupper (*p);
2058
2059 return h;
2060 }
2061
2062 static gboolean
str_ascii_case_equal(gconstpointer v1,gconstpointer v2)2063 str_ascii_case_equal (gconstpointer v1,
2064 gconstpointer v2)
2065 {
2066 const gchar *string1 = v1;
2067 const gchar *string2 = v2;
2068
2069 return g_ascii_strcasecmp (string1, string2) == 0;
2070 }
2071
2072 /**
2073 * GUriParamsIter:
2074 *
2075 * Many URI schemes include one or more attribute/value pairs as part of the URI
2076 * value. For example `scheme://server/path?query=string&is=there` has two
2077 * attributes – `query=string` and `is=there` – in its query part.
2078 *
2079 * A #GUriParamsIter structure represents an iterator that can be used to
2080 * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2081 * structures are typically allocated on the stack and then initialized with
2082 * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2083 * for a usage example.
2084 *
2085 * Since: 2.66
2086 */
2087 typedef struct
2088 {
2089 GUriParamsFlags flags;
2090 const gchar *attr;
2091 const gchar *end;
2092 guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2093 } RealIter;
2094
2095 G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2096 G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2097
2098 /**
2099 * g_uri_params_iter_init:
2100 * @iter: an uninitialized #GUriParamsIter
2101 * @params: a `%`-encoded string containing `attribute=value`
2102 * parameters
2103 * @length: the length of @params, or `-1` if it is nul-terminated
2104 * @separators: the separator byte character set between parameters. (usually
2105 * `&`, but sometimes `;` or both `&;`). Note that this function works on
2106 * bytes not characters, so it can't be used to delimit UTF-8 strings for
2107 * anything but ASCII characters. You may pass an empty set, in which case
2108 * no splitting will occur.
2109 * @flags: flags to modify the way the parameters are handled.
2110 *
2111 * Initializes an attribute/value pair iterator.
2112 *
2113 * The iterator keeps pointers to the @params and @separators arguments, those
2114 * variables must thus outlive the iterator and not be modified during the
2115 * iteration.
2116 *
2117 * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2118 * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2119 * will give attribute `foo` with value `bar baz`. This is commonly used on the
2120 * web (the `https` and `http` schemes only), but is deprecated in favour of
2121 * the equivalent of encoding spaces as `%20`.
2122 *
2123 * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2124 * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2125 * responsible for doing their own case-insensitive comparisons.
2126 *
2127 * |[<!-- language="C" -->
2128 * GUriParamsIter iter;
2129 * GError *error = NULL;
2130 * gchar *unowned_attr, *unowned_value;
2131 *
2132 * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2133 * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2134 * {
2135 * g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2136 * g_autofree gchar *value = g_steal_pointer (&unowned_value);
2137 * // do something with attr and value; this code will be called 4 times
2138 * // for the params string in this example: once with attr=foo and value=bar,
2139 * // then with baz/bar, then Foo/frob, then baz/bar2.
2140 * }
2141 * if (error)
2142 * // handle parsing error
2143 * ]|
2144 *
2145 * Since: 2.66
2146 */
2147 void
g_uri_params_iter_init(GUriParamsIter * iter,const gchar * params,gssize length,const gchar * separators,GUriParamsFlags flags)2148 g_uri_params_iter_init (GUriParamsIter *iter,
2149 const gchar *params,
2150 gssize length,
2151 const gchar *separators,
2152 GUriParamsFlags flags)
2153 {
2154 RealIter *ri = (RealIter *)iter;
2155 const gchar *s;
2156
2157 g_return_if_fail (iter != NULL);
2158 g_return_if_fail (length == 0 || params != NULL);
2159 g_return_if_fail (length >= -1);
2160 g_return_if_fail (separators != NULL);
2161
2162 ri->flags = flags;
2163
2164 if (length == -1)
2165 ri->end = params + strlen (params);
2166 else
2167 ri->end = params + length;
2168
2169 memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2170 for (s = separators; *s != '\0'; ++s)
2171 ri->sep_table[*(guchar *)s] = TRUE;
2172
2173 ri->attr = params;
2174 }
2175
2176 /**
2177 * g_uri_params_iter_next:
2178 * @iter: an initialized #GUriParamsIter
2179 * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2180 * the attribute, or %NULL.
2181 * @value: (out) (nullable) (optional) (transfer full): on return, contains
2182 * the value, or %NULL.
2183 * @error: #GError for error reporting, or %NULL to ignore.
2184 *
2185 * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2186 * an error has occurred (in which case @error is set), or if the end of the
2187 * iteration is reached (in which case @attribute and @value are set to %NULL
2188 * and the iterator becomes invalid). If %TRUE is returned,
2189 * g_uri_params_iter_next() may be called again to receive another
2190 * attribute/value pair.
2191 *
2192 * Note that the same @attribute may be returned multiple times, since URIs
2193 * allow repeated attributes.
2194 *
2195 * Returns: %FALSE if the end of the parameters has been reached or an error was
2196 * encountered. %TRUE otherwise.
2197 *
2198 * Since: 2.66
2199 */
2200 gboolean
g_uri_params_iter_next(GUriParamsIter * iter,gchar ** attribute,gchar ** value,GError ** error)2201 g_uri_params_iter_next (GUriParamsIter *iter,
2202 gchar **attribute,
2203 gchar **value,
2204 GError **error)
2205 {
2206 RealIter *ri = (RealIter *)iter;
2207 const gchar *attr_end, *val, *val_end;
2208 gchar *decoded_attr, *decoded_value;
2209 gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2210 GUriFlags decode_flags = G_URI_FLAGS_NONE;
2211
2212 g_return_val_if_fail (iter != NULL, FALSE);
2213 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2214
2215 /* Pre-clear these in case of failure or finishing. */
2216 if (attribute)
2217 *attribute = NULL;
2218 if (value)
2219 *value = NULL;
2220
2221 if (ri->attr >= ri->end)
2222 return FALSE;
2223
2224 if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2225 decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2226
2227 /* Check if each character in @attr is a separator, by indexing by the
2228 * character value into the @sep_table, which has value 1 stored at an
2229 * index if that index is a separator. */
2230 for (val_end = ri->attr; val_end < ri->end; val_end++)
2231 if (ri->sep_table[*(guchar *)val_end])
2232 break;
2233
2234 attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2235 if (!attr_end)
2236 {
2237 g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2238 _("Missing ‘=’ and parameter value"));
2239 return FALSE;
2240 }
2241 if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2242 www_form, decode_flags, G_URI_ERROR_FAILED, error))
2243 {
2244 return FALSE;
2245 }
2246
2247 val = attr_end + 1;
2248 if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2249 www_form, decode_flags, G_URI_ERROR_FAILED, error))
2250 {
2251 g_free (decoded_attr);
2252 return FALSE;
2253 }
2254
2255 if (attribute)
2256 *attribute = g_steal_pointer (&decoded_attr);
2257 if (value)
2258 *value = g_steal_pointer (&decoded_value);
2259
2260 g_free (decoded_attr);
2261 g_free (decoded_value);
2262
2263 ri->attr = val_end + 1;
2264 return TRUE;
2265 }
2266
2267 /**
2268 * g_uri_parse_params:
2269 * @params: a `%`-encoded string containing `attribute=value`
2270 * parameters
2271 * @length: the length of @params, or `-1` if it is nul-terminated
2272 * @separators: the separator byte character set between parameters. (usually
2273 * `&`, but sometimes `;` or both `&;`). Note that this function works on
2274 * bytes not characters, so it can't be used to delimit UTF-8 strings for
2275 * anything but ASCII characters. You may pass an empty set, in which case
2276 * no splitting will occur.
2277 * @flags: flags to modify the way the parameters are handled.
2278 * @error: #GError for error reporting, or %NULL to ignore.
2279 *
2280 * Many URI schemes include one or more attribute/value pairs as part of the URI
2281 * value. This method can be used to parse them into a hash table. When an
2282 * attribute has multiple occurrences, the last value is the final returned
2283 * value. If you need to handle repeated attributes differently, use
2284 * #GUriParamsIter.
2285 *
2286 * The @params string is assumed to still be `%`-encoded, but the returned
2287 * values will be fully decoded. (Thus it is possible that the returned values
2288 * may contain `=` or @separators, if the value was encoded in the input.)
2289 * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2290 * rules for g_uri_parse(). (However, if @params is the path or query string
2291 * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2292 * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2293 * invalid encoding.)
2294 *
2295 * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2296 *
2297 * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2298 * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2299 * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2300 * the returned attributes.
2301 *
2302 * If @params cannot be parsed (for example, it contains two @separators
2303 * characters in a row), then @error is set and %NULL is returned.
2304 *
2305 * Return value: (transfer full) (element-type utf8 utf8):
2306 * A hash table of attribute/value pairs, with both names and values
2307 * fully-decoded; or %NULL on error.
2308 *
2309 * Since: 2.66
2310 */
2311 GHashTable *
g_uri_parse_params(const gchar * params,gssize length,const gchar * separators,GUriParamsFlags flags,GError ** error)2312 g_uri_parse_params (const gchar *params,
2313 gssize length,
2314 const gchar *separators,
2315 GUriParamsFlags flags,
2316 GError **error)
2317 {
2318 GHashTable *hash;
2319 GUriParamsIter iter;
2320 gchar *attribute, *value;
2321 GError *err = NULL;
2322
2323 g_return_val_if_fail (length == 0 || params != NULL, NULL);
2324 g_return_val_if_fail (length >= -1, NULL);
2325 g_return_val_if_fail (separators != NULL, NULL);
2326 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2327
2328 if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2329 {
2330 hash = g_hash_table_new_full (str_ascii_case_hash,
2331 str_ascii_case_equal,
2332 g_free, g_free);
2333 }
2334 else
2335 {
2336 hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2337 g_free, g_free);
2338 }
2339
2340 g_uri_params_iter_init (&iter, params, length, separators, flags);
2341
2342 while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2343 g_hash_table_insert (hash, attribute, value);
2344
2345 if (err)
2346 {
2347 g_propagate_error (error, g_steal_pointer (&err));
2348 g_hash_table_destroy (hash);
2349 return NULL;
2350 }
2351
2352 return g_steal_pointer (&hash);
2353 }
2354
2355 /**
2356 * g_uri_get_scheme:
2357 * @uri: a #GUri
2358 *
2359 * Gets @uri's scheme. Note that this will always be all-lowercase,
2360 * regardless of the string or strings that @uri was created from.
2361 *
2362 * Return value: (not nullable): @uri's scheme.
2363 *
2364 * Since: 2.66
2365 */
2366 const gchar *
g_uri_get_scheme(GUri * uri)2367 g_uri_get_scheme (GUri *uri)
2368 {
2369 g_return_val_if_fail (uri != NULL, NULL);
2370
2371 return uri->scheme;
2372 }
2373
2374 /**
2375 * g_uri_get_userinfo:
2376 * @uri: a #GUri
2377 *
2378 * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2379 * the flags with which @uri was created.
2380 *
2381 * Return value: (nullable): @uri's userinfo.
2382 *
2383 * Since: 2.66
2384 */
2385 const gchar *
g_uri_get_userinfo(GUri * uri)2386 g_uri_get_userinfo (GUri *uri)
2387 {
2388 g_return_val_if_fail (uri != NULL, NULL);
2389
2390 return uri->userinfo;
2391 }
2392
2393 /**
2394 * g_uri_get_user:
2395 * @uri: a #GUri
2396 *
2397 * Gets the ‘username’ component of @uri's userinfo, which may contain
2398 * `%`-encoding, depending on the flags with which @uri was created.
2399 * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2400 * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2401 *
2402 * Return value: (nullable): @uri's user.
2403 *
2404 * Since: 2.66
2405 */
2406 const gchar *
g_uri_get_user(GUri * uri)2407 g_uri_get_user (GUri *uri)
2408 {
2409 g_return_val_if_fail (uri != NULL, NULL);
2410
2411 return uri->user;
2412 }
2413
2414 /**
2415 * g_uri_get_password:
2416 * @uri: a #GUri
2417 *
2418 * Gets @uri's password, which may contain `%`-encoding, depending on
2419 * the flags with which @uri was created. (If @uri was not created
2420 * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2421 *
2422 * Return value: (nullable): @uri's password.
2423 *
2424 * Since: 2.66
2425 */
2426 const gchar *
g_uri_get_password(GUri * uri)2427 g_uri_get_password (GUri *uri)
2428 {
2429 g_return_val_if_fail (uri != NULL, NULL);
2430
2431 return uri->password;
2432 }
2433
2434 /**
2435 * g_uri_get_auth_params:
2436 * @uri: a #GUri
2437 *
2438 * Gets @uri's authentication parameters, which may contain
2439 * `%`-encoding, depending on the flags with which @uri was created.
2440 * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2441 * be %NULL.)
2442 *
2443 * Depending on the URI scheme, g_uri_parse_params() may be useful for
2444 * further parsing this information.
2445 *
2446 * Return value: (nullable): @uri's authentication parameters.
2447 *
2448 * Since: 2.66
2449 */
2450 const gchar *
g_uri_get_auth_params(GUri * uri)2451 g_uri_get_auth_params (GUri *uri)
2452 {
2453 g_return_val_if_fail (uri != NULL, NULL);
2454
2455 return uri->auth_params;
2456 }
2457
2458 /**
2459 * g_uri_get_host:
2460 * @uri: a #GUri
2461 *
2462 * Gets @uri's host. This will never have `%`-encoded characters,
2463 * unless it is non-UTF-8 (which can only be the case if @uri was
2464 * created with %G_URI_FLAGS_NON_DNS).
2465 *
2466 * If @uri contained an IPv6 address literal, this value will be just
2467 * that address, without the brackets around it that are necessary in
2468 * the string form of the URI. Note that in this case there may also
2469 * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2470 * `fe80::1234%``25em1` if the string is still encoded).
2471 *
2472 * Return value: (nullable): @uri's host.
2473 *
2474 * Since: 2.66
2475 */
2476 const gchar *
g_uri_get_host(GUri * uri)2477 g_uri_get_host (GUri *uri)
2478 {
2479 g_return_val_if_fail (uri != NULL, NULL);
2480
2481 return uri->host;
2482 }
2483
2484 /**
2485 * g_uri_get_port:
2486 * @uri: a #GUri
2487 *
2488 * Gets @uri's port.
2489 *
2490 * Return value: @uri's port, or `-1` if no port was specified.
2491 *
2492 * Since: 2.66
2493 */
2494 gint
g_uri_get_port(GUri * uri)2495 g_uri_get_port (GUri *uri)
2496 {
2497 g_return_val_if_fail (uri != NULL, -1);
2498
2499 if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2500 return default_scheme_port (uri->scheme);
2501
2502 return uri->port;
2503 }
2504
2505 /**
2506 * g_uri_get_path:
2507 * @uri: a #GUri
2508 *
2509 * Gets @uri's path, which may contain `%`-encoding, depending on the
2510 * flags with which @uri was created.
2511 *
2512 * Return value: (not nullable): @uri's path.
2513 *
2514 * Since: 2.66
2515 */
2516 const gchar *
g_uri_get_path(GUri * uri)2517 g_uri_get_path (GUri *uri)
2518 {
2519 g_return_val_if_fail (uri != NULL, NULL);
2520
2521 return uri->path;
2522 }
2523
2524 /**
2525 * g_uri_get_query:
2526 * @uri: a #GUri
2527 *
2528 * Gets @uri's query, which may contain `%`-encoding, depending on the
2529 * flags with which @uri was created.
2530 *
2531 * For queries consisting of a series of `name=value` parameters,
2532 * #GUriParamsIter or g_uri_parse_params() may be useful.
2533 *
2534 * Return value: (nullable): @uri's query.
2535 *
2536 * Since: 2.66
2537 */
2538 const gchar *
g_uri_get_query(GUri * uri)2539 g_uri_get_query (GUri *uri)
2540 {
2541 g_return_val_if_fail (uri != NULL, NULL);
2542
2543 return uri->query;
2544 }
2545
2546 /**
2547 * g_uri_get_fragment:
2548 * @uri: a #GUri
2549 *
2550 * Gets @uri's fragment, which may contain `%`-encoding, depending on
2551 * the flags with which @uri was created.
2552 *
2553 * Return value: (nullable): @uri's fragment.
2554 *
2555 * Since: 2.66
2556 */
2557 const gchar *
g_uri_get_fragment(GUri * uri)2558 g_uri_get_fragment (GUri *uri)
2559 {
2560 g_return_val_if_fail (uri != NULL, NULL);
2561
2562 return uri->fragment;
2563 }
2564
2565
2566 /**
2567 * g_uri_get_flags:
2568 * @uri: a #GUri
2569 *
2570 * Gets @uri's flags set upon construction.
2571 *
2572 * Return value: @uri's flags.
2573 *
2574 * Since: 2.66
2575 **/
2576 GUriFlags
g_uri_get_flags(GUri * uri)2577 g_uri_get_flags (GUri *uri)
2578 {
2579 g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2580
2581 return uri->flags;
2582 }
2583
2584 /**
2585 * g_uri_unescape_segment:
2586 * @escaped_string: (nullable): A string, may be %NULL
2587 * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2588 * may be %NULL
2589 * @illegal_characters: (nullable): An optional string of illegal
2590 * characters not to be allowed, may be %NULL
2591 *
2592 * Unescapes a segment of an escaped string.
2593 *
2594 * If any of the characters in @illegal_characters or the NUL
2595 * character appears as an escaped character in @escaped_string, then
2596 * that is an error and %NULL will be returned. This is useful if you
2597 * want to avoid for instance having a slash being expanded in an
2598 * escaped path element, which might confuse pathname handling.
2599 *
2600 * Note: `NUL` byte is not accepted in the output, in contrast to
2601 * g_uri_unescape_bytes().
2602 *
2603 * Returns: (nullable): an unescaped version of @escaped_string,
2604 * or %NULL on error. The returned string should be freed when no longer
2605 * needed. As a special case if %NULL is given for @escaped_string, this
2606 * function will return %NULL.
2607 *
2608 * Since: 2.16
2609 **/
2610 gchar *
g_uri_unescape_segment(const gchar * escaped_string,const gchar * escaped_string_end,const gchar * illegal_characters)2611 g_uri_unescape_segment (const gchar *escaped_string,
2612 const gchar *escaped_string_end,
2613 const gchar *illegal_characters)
2614 {
2615 gchar *unescaped;
2616 gsize length;
2617 gssize decoded_len;
2618
2619 if (!escaped_string)
2620 return NULL;
2621
2622 if (escaped_string_end)
2623 length = escaped_string_end - escaped_string;
2624 else
2625 length = strlen (escaped_string);
2626
2627 decoded_len = uri_decoder (&unescaped,
2628 illegal_characters,
2629 escaped_string, length,
2630 FALSE, FALSE,
2631 G_URI_FLAGS_ENCODED,
2632 0, NULL);
2633 if (decoded_len < 0)
2634 return NULL;
2635
2636 if (memchr (unescaped, '\0', decoded_len))
2637 {
2638 g_free (unescaped);
2639 return NULL;
2640 }
2641
2642 return unescaped;
2643 }
2644
2645 /**
2646 * g_uri_unescape_string:
2647 * @escaped_string: an escaped string to be unescaped.
2648 * @illegal_characters: (nullable): a string of illegal characters
2649 * not to be allowed, or %NULL.
2650 *
2651 * Unescapes a whole escaped string.
2652 *
2653 * If any of the characters in @illegal_characters or the NUL
2654 * character appears as an escaped character in @escaped_string, then
2655 * that is an error and %NULL will be returned. This is useful if you
2656 * want to avoid for instance having a slash being expanded in an
2657 * escaped path element, which might confuse pathname handling.
2658 *
2659 * Returns: (nullable): an unescaped version of @escaped_string.
2660 * The returned string should be freed when no longer needed.
2661 *
2662 * Since: 2.16
2663 **/
2664 gchar *
g_uri_unescape_string(const gchar * escaped_string,const gchar * illegal_characters)2665 g_uri_unescape_string (const gchar *escaped_string,
2666 const gchar *illegal_characters)
2667 {
2668 return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2669 }
2670
2671 /**
2672 * g_uri_escape_string:
2673 * @unescaped: the unescaped input string.
2674 * @reserved_chars_allowed: (nullable): a string of reserved
2675 * characters that are allowed to be used, or %NULL.
2676 * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2677 *
2678 * Escapes a string for use in a URI.
2679 *
2680 * Normally all characters that are not "unreserved" (i.e. ASCII
2681 * alphanumerical characters plus dash, dot, underscore and tilde) are
2682 * escaped. But if you specify characters in @reserved_chars_allowed
2683 * they are not escaped. This is useful for the "reserved" characters
2684 * in the URI specification, since those are allowed unescaped in some
2685 * portions of a URI.
2686 *
2687 * Returns: (not nullable): an escaped version of @unescaped. The
2688 * returned string should be freed when no longer needed.
2689 *
2690 * Since: 2.16
2691 **/
2692 gchar *
g_uri_escape_string(const gchar * unescaped,const gchar * reserved_chars_allowed,gboolean allow_utf8)2693 g_uri_escape_string (const gchar *unescaped,
2694 const gchar *reserved_chars_allowed,
2695 gboolean allow_utf8)
2696 {
2697 GString *s;
2698
2699 g_return_val_if_fail (unescaped != NULL, NULL);
2700
2701 s = g_string_sized_new (strlen (unescaped) * 1.25);
2702
2703 g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2704
2705 return g_string_free (s, FALSE);
2706 }
2707
2708 /**
2709 * g_uri_unescape_bytes:
2710 * @escaped_string: A URI-escaped string
2711 * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2712 * is nul-terminated.
2713 * @illegal_characters: (nullable): a string of illegal characters
2714 * not to be allowed, or %NULL.
2715 * @error: #GError for error reporting, or %NULL to ignore.
2716 *
2717 * Unescapes a segment of an escaped string as binary data.
2718 *
2719 * Note that in contrast to g_uri_unescape_string(), this does allow
2720 * nul bytes to appear in the output.
2721 *
2722 * If any of the characters in @illegal_characters appears as an escaped
2723 * character in @escaped_string, then that is an error and %NULL will be
2724 * returned. This is useful if you want to avoid for instance having a slash
2725 * being expanded in an escaped path element, which might confuse pathname
2726 * handling.
2727 *
2728 * Returns: (transfer full): an unescaped version of @escaped_string
2729 * or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2730 * code). The returned #GBytes should be unreffed when no longer needed.
2731 *
2732 * Since: 2.66
2733 **/
2734 GBytes *
g_uri_unescape_bytes(const gchar * escaped_string,gssize length,const char * illegal_characters,GError ** error)2735 g_uri_unescape_bytes (const gchar *escaped_string,
2736 gssize length,
2737 const char *illegal_characters,
2738 GError **error)
2739 {
2740 gchar *buf;
2741 gssize unescaped_length;
2742
2743 g_return_val_if_fail (escaped_string != NULL, NULL);
2744 g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2745
2746 if (length == -1)
2747 length = strlen (escaped_string);
2748
2749 unescaped_length = uri_decoder (&buf,
2750 illegal_characters,
2751 escaped_string, length,
2752 FALSE,
2753 FALSE,
2754 G_URI_FLAGS_ENCODED,
2755 G_URI_ERROR_FAILED, error);
2756 if (unescaped_length == -1)
2757 return NULL;
2758
2759 return g_bytes_new_take (buf, unescaped_length);
2760 }
2761
2762 /**
2763 * g_uri_escape_bytes:
2764 * @unescaped: (array length=length): the unescaped input data.
2765 * @length: the length of @unescaped
2766 * @reserved_chars_allowed: (nullable): a string of reserved
2767 * characters that are allowed to be used, or %NULL.
2768 *
2769 * Escapes arbitrary data for use in a URI.
2770 *
2771 * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2772 * alphanumerical characters plus dash, dot, underscore and tilde) are
2773 * escaped. But if you specify characters in @reserved_chars_allowed
2774 * they are not escaped. This is useful for the ‘reserved’ characters
2775 * in the URI specification, since those are allowed unescaped in some
2776 * portions of a URI.
2777 *
2778 * Though technically incorrect, this will also allow escaping nul
2779 * bytes as `%``00`.
2780 *
2781 * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2782 * The returned string should be freed when no longer needed.
2783 *
2784 * Since: 2.66
2785 */
2786 gchar *
g_uri_escape_bytes(const guint8 * unescaped,gsize length,const gchar * reserved_chars_allowed)2787 g_uri_escape_bytes (const guint8 *unescaped,
2788 gsize length,
2789 const gchar *reserved_chars_allowed)
2790 {
2791 GString *string;
2792
2793 g_return_val_if_fail (unescaped != NULL, NULL);
2794
2795 string = g_string_sized_new (length * 1.25);
2796
2797 _uri_encoder (string, unescaped, length,
2798 reserved_chars_allowed, FALSE);
2799
2800 return g_string_free (string, FALSE);
2801 }
2802
2803 static gssize
g_uri_scheme_length(const gchar * uri)2804 g_uri_scheme_length (const gchar *uri)
2805 {
2806 const gchar *p;
2807
2808 p = uri;
2809 if (!g_ascii_isalpha (*p))
2810 return -1;
2811 p++;
2812 while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2813 p++;
2814
2815 if (p > uri && *p == ':')
2816 return p - uri;
2817
2818 return -1;
2819 }
2820
2821 /**
2822 * g_uri_parse_scheme:
2823 * @uri: a valid URI.
2824 *
2825 * Gets the scheme portion of a URI string.
2826 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2827 * as:
2828 * |[
2829 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2830 * ]|
2831 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2832 *
2833 * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2834 * %NULL on error. The returned string should be freed when no longer needed.
2835 *
2836 * Since: 2.16
2837 **/
2838 gchar *
g_uri_parse_scheme(const gchar * uri)2839 g_uri_parse_scheme (const gchar *uri)
2840 {
2841 gssize len;
2842
2843 g_return_val_if_fail (uri != NULL, NULL);
2844
2845 len = g_uri_scheme_length (uri);
2846 return len == -1 ? NULL : g_strndup (uri, len);
2847 }
2848
2849 /**
2850 * g_uri_peek_scheme:
2851 * @uri: a valid URI.
2852 *
2853 * Gets the scheme portion of a URI string.
2854 * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2855 * as:
2856 * |[
2857 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2858 * ]|
2859 * Common schemes include `file`, `https`, `svn+ssh`, etc.
2860 *
2861 * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2862 * all-lowercase and does not need to be freed.
2863 *
2864 * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2865 * %NULL on error. The returned string is normalized to all-lowercase, and
2866 * interned via g_intern_string(), so it does not need to be freed.
2867 *
2868 * Since: 2.66
2869 **/
2870 const gchar *
g_uri_peek_scheme(const gchar * uri)2871 g_uri_peek_scheme (const gchar *uri)
2872 {
2873 gssize len;
2874 gchar *lower_scheme;
2875 const gchar *scheme;
2876
2877 g_return_val_if_fail (uri != NULL, NULL);
2878
2879 len = g_uri_scheme_length (uri);
2880 if (len == -1)
2881 return NULL;
2882
2883 lower_scheme = g_ascii_strdown (uri, len);
2884 scheme = g_intern_string (lower_scheme);
2885 g_free (lower_scheme);
2886
2887 return scheme;
2888 }
2889
2890 G_DEFINE_QUARK (g-uri-quark, g_uri_error)
2891