1 /* GLIB - Library of useful routines for C programming
2  * Copyright © 2020 Red Hat, Inc.
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General
15  * Public License along with this library; if not, see
16  * <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "config.h"
20 
21 #include <stdlib.h>
22 #include <string.h>
23 
24 #include "glib.h"
25 #include "glibintl.h"
26 #include "guriprivate.h"
27 
28 /**
29  * SECTION:guri
30  * @short_description: URI-handling utilities
31  * @include: glib.h
32  *
33  * The #GUri type and related functions can be used to parse URIs into
34  * their components, and build valid URIs from individual components.
35  *
36  * Note that #GUri scope is to help manipulate URIs in various applications,
37  * following [RFC 3986](https://tools.ietf.org/html/rfc3986). In particular,
38  * it doesn't intend to cover web browser needs, and doesn't implement the
39  * [WHATWG URL](https://url.spec.whatwg.org/) standard. No APIs are provided to
40  * help prevent
41  * [homograph attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack), so
42  * #GUri is not suitable for formatting URIs for display to the user for making
43  * security-sensitive decisions.
44  *
45  * ## Relative and absolute URIs # {#relative-absolute-uris}
46  *
47  * As defined in [RFC 3986](https://tools.ietf.org/html/rfc3986#section-4), the
48  * hierarchical nature of URIs means that they can either be ‘relative
49  * references’ (sometimes referred to as ‘relative URIs’) or ‘URIs’ (for
50  * clarity, ‘URIs’ are referred to in this documentation as
51  * ‘absolute URIs’ — although
52  * [in constrast to RFC 3986](https://tools.ietf.org/html/rfc3986#section-4.3),
53  * fragment identifiers are always allowed).
54  *
55  * Relative references have one or more components of the URI missing. In
56  * particular, they have no scheme. Any other component, such as hostname,
57  * query, etc. may be missing, apart from a path, which has to be specified (but
58  * may be empty). The path may be relative, starting with `./` rather than `/`.
59  *
60  * For example, a valid relative reference is `./path?query`,
61  * `/?query#fragment` or `//example.com`.
62  *
63  * Absolute URIs have a scheme specified. Any other components of the URI which
64  * are missing are specified as explicitly unset in the URI, rather than being
65  * resolved relative to a base URI using g_uri_parse_relative().
66  *
67  * For example, a valid absolute URI is `file:///home/bob` or
68  * `https://search.com?query=string`.
69  *
70  * A #GUri instance is always an absolute URI. A string may be an absolute URI
71  * or a relative reference; see the documentation for individual functions as to
72  * what forms they accept.
73  *
74  * ## Parsing URIs
75  *
76  * The most minimalist APIs for parsing URIs are g_uri_split() and
77  * g_uri_split_with_user(). These split a URI into its component
78  * parts, and return the parts; the difference between the two is that
79  * g_uri_split() treats the ‘userinfo’ component of the URI as a
80  * single element, while g_uri_split_with_user() can (depending on the
81  * #GUriFlags you pass) treat it as containing a username, password,
82  * and authentication parameters. Alternatively, g_uri_split_network()
83  * can be used when you are only interested in the components that are
84  * needed to initiate a network connection to the service (scheme,
85  * host, and port).
86  *
87  * g_uri_parse() is similar to g_uri_split(), but instead of returning
88  * individual strings, it returns a #GUri structure (and it requires
89  * that the URI be an absolute URI).
90  *
91  * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
92  * resolve a relative URI relative to a base URI.
93  * g_uri_resolve_relative() takes two strings and returns a string,
94  * and g_uri_parse_relative() takes a #GUri and a string and returns a
95  * #GUri.
96  *
97  * All of the parsing functions take a #GUriFlags argument describing
98  * exactly how to parse the URI; see the documentation for that type
99  * for more details on the specific flags that you can pass. If you
100  * need to choose different flags based on the type of URI, you can
101  * use g_uri_peek_scheme() on the URI string to check the scheme
102  * first, and use that to decide what flags to parse it with.
103  *
104  * For example, you might want to use %G_URI_PARAMS_WWW_FORM when parsing the
105  * params for a web URI, so compare the result of g_uri_peek_scheme() against
106  * `http` and `https`.
107  *
108  * ## Building URIs
109  *
110  * g_uri_join() and g_uri_join_with_user() can be used to construct
111  * valid URI strings from a set of component strings. They are the
112  * inverse of g_uri_split() and g_uri_split_with_user().
113  *
114  * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
115  * construct a #GUri from a set of component strings.
116  *
117  * As with the parsing functions, the building functions take a
118  * #GUriFlags argument. In particular, it is important to keep in mind
119  * whether the URI components you are using are already `%`-encoded. If so,
120  * you must pass the %G_URI_FLAGS_ENCODED flag.
121  *
122  * ## `file://` URIs
123  *
124  * Note that Windows and Unix both define special rules for parsing
125  * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
126  * interpretation of path separators on Windows). #GUri does not
127  * implement these rules. Use g_filename_from_uri() and
128  * g_filename_to_uri() if you want to properly convert between
129  * `file://` URIs and local filenames.
130  *
131  * ## URI Equality
132  *
133  * Note that there is no `g_uri_equal ()` function, because comparing
134  * URIs usefully requires scheme-specific knowledge that #GUri does
135  * not have. #GUri can help with normalization if you use the various
136  * encoded #GUriFlags as well as %G_URI_FLAGS_SCHEME_NORMALIZE however
137  * it is not comprehensive.
138  * For example, `data:,foo` and `data:;base64,Zm9v` resolve to the same
139  * thing according to the `data:` URI specification which GLib does not
140  * handle.
141  *
142  * Since: 2.66
143  */
144 
145 /**
146  * GUri:
147  *
148  * A parsed absolute URI.
149  *
150  * Since #GUri only represents absolute URIs, all #GUris will have a
151  * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
152  * answer. Likewise, by definition, all URIs have a path component, so
153  * g_uri_get_path() will always return a non-%NULL string (which may be empty).
154  *
155  * If the URI string has an
156  * [‘authority’ component](https://tools.ietf.org/html/rfc3986#section-3) (that
157  * is, if the scheme is followed by `://` rather than just `:`), then the
158  * #GUri will contain a hostname, and possibly a port and ‘userinfo’.
159  * Additionally, depending on how the #GUri was constructed/parsed (for example,
160  * using the %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS flags),
161  * the userinfo may be split out into a username, password, and
162  * additional authorization-related parameters.
163  *
164  * Normally, the components of a #GUri will have all `%`-encoded
165  * characters decoded. However, if you construct/parse a #GUri with
166  * %G_URI_FLAGS_ENCODED, then the `%`-encoding will be preserved instead in
167  * the userinfo, path, and query fields (and in the host field if also
168  * created with %G_URI_FLAGS_NON_DNS). In particular, this is necessary if
169  * the URI may contain binary data or non-UTF-8 text, or if decoding
170  * the components might change the interpretation of the URI.
171  *
172  * For example, with the encoded flag:
173  *
174  * |[<!-- language="C" -->
175  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_ENCODED, &err);
176  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue");
177  * ]|
178  *
179  * While the default `%`-decoding behaviour would give:
180  *
181  * |[<!-- language="C" -->
182  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fparam%3Dvalue", G_URI_FLAGS_NONE, &err);
183  *   g_assert_cmpstr (g_uri_get_query (uri), ==, "query=http://host/path?param=value");
184  * ]|
185  *
186  * During decoding, if an invalid UTF-8 string is encountered, parsing will fail
187  * with an error indicating the bad string location:
188  *
189  * |[<!-- language="C" -->
190  *   g_autoptr(GUri) uri = g_uri_parse ("http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fbad%3D%00alue", G_URI_FLAGS_NONE, &err);
191  *   g_assert_error (err, G_URI_ERROR, G_URI_ERROR_BAD_QUERY);
192  * ]|
193  *
194  * You should pass %G_URI_FLAGS_ENCODED or %G_URI_FLAGS_ENCODED_QUERY if you
195  * need to handle that case manually. In particular, if the query string
196  * contains `=` characters that are `%`-encoded, you should let
197  * g_uri_parse_params() do the decoding once of the query.
198  *
199  * #GUri is immutable once constructed, and can safely be accessed from
200  * multiple threads. Its reference counting is atomic.
201  *
202  * Since: 2.66
203  */
204 struct _GUri {
205   gchar     *scheme;
206   gchar     *userinfo;
207   gchar     *host;
208   gint       port;
209   gchar     *path;
210   gchar     *query;
211   gchar     *fragment;
212 
213   gchar     *user;
214   gchar     *password;
215   gchar     *auth_params;
216 
217   GUriFlags  flags;
218 };
219 
220 /**
221  * g_uri_ref: (skip)
222  * @uri: a #GUri
223  *
224  * Increments the reference count of @uri by one.
225  *
226  * Returns: @uri
227  *
228  * Since: 2.66
229  */
230 GUri *
g_uri_ref(GUri * uri)231 g_uri_ref (GUri *uri)
232 {
233   g_return_val_if_fail (uri != NULL, NULL);
234 
235   return g_atomic_rc_box_acquire (uri);
236 }
237 
238 static void
g_uri_clear(GUri * uri)239 g_uri_clear (GUri *uri)
240 {
241   g_free (uri->scheme);
242   g_free (uri->userinfo);
243   g_free (uri->host);
244   g_free (uri->path);
245   g_free (uri->query);
246   g_free (uri->fragment);
247   g_free (uri->user);
248   g_free (uri->password);
249   g_free (uri->auth_params);
250 }
251 
252 /**
253  * g_uri_unref: (skip)
254  * @uri: a #GUri
255  *
256  * Atomically decrements the reference count of @uri by one.
257  *
258  * When the reference count reaches zero, the resources allocated by
259  * @uri are freed
260  *
261  * Since: 2.66
262  */
263 void
g_uri_unref(GUri * uri)264 g_uri_unref (GUri *uri)
265 {
266   g_return_if_fail (uri != NULL);
267 
268   g_atomic_rc_box_release_full (uri, (GDestroyNotify)g_uri_clear);
269 }
270 
271 static gboolean
g_uri_char_is_unreserved(gchar ch)272 g_uri_char_is_unreserved (gchar ch)
273 {
274   if (g_ascii_isalnum (ch))
275     return TRUE;
276   return ch == '-' || ch == '.' || ch == '_' || ch == '~';
277 }
278 
279 #define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
280 #define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
281 
282 static gssize
uri_decoder(gchar ** out,const gchar * illegal_chars,const gchar * start,gsize length,gboolean just_normalize,gboolean www_form,GUriFlags flags,GUriError parse_error,GError ** error)283 uri_decoder (gchar       **out,
284              const gchar  *illegal_chars,
285              const gchar  *start,
286              gsize         length,
287              gboolean      just_normalize,
288              gboolean      www_form,
289              GUriFlags     flags,
290              GUriError     parse_error,
291              GError      **error)
292 {
293   gchar c;
294   GString *decoded;
295   const gchar *invalid, *s, *end;
296   gssize len;
297 
298   if (!(flags & G_URI_FLAGS_ENCODED))
299     just_normalize = FALSE;
300 
301   decoded = g_string_sized_new (length + 1);
302   for (s = start, end = s + length; s < end; s++)
303     {
304       if (*s == '%')
305         {
306           if (s + 2 >= end ||
307               !g_ascii_isxdigit (s[1]) ||
308               !g_ascii_isxdigit (s[2]))
309             {
310               /* % followed by non-hex or the end of the string; this is an error */
311               if (!(flags & G_URI_FLAGS_PARSE_RELAXED))
312                 {
313                   g_set_error_literal (error, G_URI_ERROR, parse_error,
314                                        /* xgettext: no-c-format */
315                                        _("Invalid %-encoding in URI"));
316                   g_string_free (decoded, TRUE);
317                   return -1;
318                 }
319 
320               /* In non-strict mode, just let it through; we *don't*
321                * fix it to "%25", since that might change the way that
322                * the URI's owner would interpret it.
323                */
324               g_string_append_c (decoded, *s);
325               continue;
326             }
327 
328           c = HEXCHAR (s);
329           if (illegal_chars && strchr (illegal_chars, c))
330             {
331               g_set_error_literal (error, G_URI_ERROR, parse_error,
332                                    _("Illegal character in URI"));
333               g_string_free (decoded, TRUE);
334               return -1;
335             }
336           if (just_normalize && !g_uri_char_is_unreserved (c))
337             {
338               /* Leave the % sequence there but normalize it. */
339               g_string_append_c (decoded, *s);
340               g_string_append_c (decoded, g_ascii_toupper (s[1]));
341               g_string_append_c (decoded, g_ascii_toupper (s[2]));
342               s += 2;
343             }
344           else
345             {
346               g_string_append_c (decoded, c);
347               s += 2;
348             }
349         }
350       else if (www_form && *s == '+')
351         g_string_append_c (decoded, ' ');
352       /* Normalize any illegal characters. */
353       else if (just_normalize && (!g_ascii_isgraph (*s)))
354         g_string_append_printf (decoded, "%%%02X", (guchar)*s);
355       else
356         g_string_append_c (decoded, *s);
357     }
358 
359   len = decoded->len;
360   g_assert (len >= 0);
361 
362   if (!(flags & G_URI_FLAGS_ENCODED) &&
363       !g_utf8_validate (decoded->str, len, &invalid))
364     {
365       g_set_error_literal (error, G_URI_ERROR, parse_error,
366                            _("Non-UTF-8 characters in URI"));
367       g_string_free (decoded, TRUE);
368       return -1;
369     }
370 
371   if (out)
372     *out = g_string_free (decoded, FALSE);
373   else
374     g_string_free (decoded, TRUE);
375 
376   return len;
377 }
378 
379 static gboolean
uri_decode(gchar ** out,const gchar * illegal_chars,const gchar * start,gsize length,gboolean www_form,GUriFlags flags,GUriError parse_error,GError ** error)380 uri_decode (gchar       **out,
381             const gchar  *illegal_chars,
382             const gchar  *start,
383             gsize         length,
384             gboolean      www_form,
385             GUriFlags     flags,
386             GUriError     parse_error,
387             GError      **error)
388 {
389   return uri_decoder (out, illegal_chars, start, length, FALSE, www_form, flags,
390                       parse_error, error) != -1;
391 }
392 
393 static gboolean
uri_normalize(gchar ** out,const gchar * start,gsize length,GUriFlags flags,GUriError parse_error,GError ** error)394 uri_normalize (gchar       **out,
395                const gchar  *start,
396                gsize         length,
397                GUriFlags     flags,
398                GUriError     parse_error,
399                GError      **error)
400 {
401   return uri_decoder (out, NULL, start, length, TRUE, FALSE, flags,
402                       parse_error, error) != -1;
403 }
404 
405 static gboolean
is_valid(guchar c,const gchar * reserved_chars_allowed)406 is_valid (guchar       c,
407           const gchar *reserved_chars_allowed)
408 {
409   if (g_uri_char_is_unreserved (c))
410     return TRUE;
411 
412   if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
413     return TRUE;
414 
415   return FALSE;
416 }
417 
418 void
_uri_encoder(GString * out,const guchar * start,gsize length,const gchar * reserved_chars_allowed,gboolean allow_utf8)419 _uri_encoder (GString      *out,
420               const guchar *start,
421               gsize         length,
422               const gchar  *reserved_chars_allowed,
423               gboolean      allow_utf8)
424 {
425   static const gchar hex[16] = "0123456789ABCDEF";
426   const guchar *p = start;
427   const guchar *end = p + length;
428 
429   while (p < end)
430     {
431       gunichar multibyte_utf8_char = 0;
432 
433       if (allow_utf8 && *p >= 0x80)
434         multibyte_utf8_char = g_utf8_get_char_validated ((gchar *)p, end - p);
435 
436       if (multibyte_utf8_char > 0 &&
437           multibyte_utf8_char != (gunichar) -1 && multibyte_utf8_char != (gunichar) -2)
438         {
439           gint len = g_utf8_skip [*p];
440           g_string_append_len (out, (gchar *)p, len);
441           p += len;
442         }
443       else if (is_valid (*p, reserved_chars_allowed))
444         {
445           g_string_append_c (out, *p);
446           p++;
447         }
448       else
449         {
450           g_string_append_c (out, '%');
451           g_string_append_c (out, hex[*p >> 4]);
452           g_string_append_c (out, hex[*p & 0xf]);
453           p++;
454         }
455     }
456 }
457 
458 /* Parse the IP-literal construction from RFC 6874 (which extends RFC 3986 to
459  * support IPv6 zone identifiers.
460  *
461  * Currently, IP versions beyond 6 (i.e. the IPvFuture rule) are unsupported.
462  * There’s no point supporting them until (a) they exist and (b) the rest of the
463  * stack (notably, sockets) supports them.
464  *
465  * Rules:
466  *
467  * IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
468  *
469  * ZoneID = 1*( unreserved / pct-encoded )
470  *
471  * IPv6addrz = IPv6address "%25" ZoneID
472  *
473  * If %G_URI_FLAGS_PARSE_RELAXED is specified, this function also accepts:
474  *
475  * IPv6addrz = IPv6address "%" ZoneID
476  */
477 static gboolean
parse_ip_literal(const gchar * start,gsize length,GUriFlags flags,gchar ** out,GError ** error)478 parse_ip_literal (const gchar  *start,
479                   gsize         length,
480                   GUriFlags     flags,
481                   gchar       **out,
482                   GError      **error)
483 {
484   gchar *pct, *zone_id = NULL;
485   gchar *addr = NULL;
486   gsize addr_length = 0;
487   gsize zone_id_length = 0;
488   gchar *decoded_zone_id = NULL;
489 
490   if (start[length - 1] != ']')
491     goto bad_ipv6_literal;
492 
493   /* Drop the square brackets */
494   addr = g_strndup (start + 1, length - 2);
495   addr_length = length - 2;
496 
497   /* If there's an IPv6 scope ID, split out the zone. */
498   pct = strchr (addr, '%');
499   if (pct != NULL)
500     {
501       *pct = '\0';
502 
503       if (addr_length - (pct - addr) >= 4 &&
504           *(pct + 1) == '2' && *(pct + 2) == '5')
505         {
506           zone_id = pct + 3;
507           zone_id_length = addr_length - (zone_id - addr);
508         }
509       else if (flags & G_URI_FLAGS_PARSE_RELAXED &&
510                addr_length - (pct - addr) >= 2)
511         {
512           zone_id = pct + 1;
513           zone_id_length = addr_length - (zone_id - addr);
514         }
515       else
516         goto bad_ipv6_literal;
517 
518       g_assert (zone_id_length >= 1);
519     }
520 
521   /* addr must be an IPv6 address */
522   if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
523     goto bad_ipv6_literal;
524 
525   /* Zone ID must be valid. It can contain %-encoded characters. */
526   if (zone_id != NULL &&
527       !uri_decode (&decoded_zone_id, NULL, zone_id, zone_id_length, FALSE,
528                    flags, G_URI_ERROR_BAD_HOST, NULL))
529     goto bad_ipv6_literal;
530 
531   /* Success */
532   if (out != NULL && decoded_zone_id != NULL)
533     *out = g_strconcat (addr, "%", decoded_zone_id, NULL);
534   else if (out != NULL)
535     *out = g_steal_pointer (&addr);
536 
537   g_free (addr);
538   g_free (decoded_zone_id);
539 
540   return TRUE;
541 
542 bad_ipv6_literal:
543   g_free (addr);
544   g_free (decoded_zone_id);
545   g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
546                _("Invalid IPv6 address ‘%.*s’ in URI"),
547                (gint)length, start);
548 
549   return FALSE;
550 }
551 
552 static gboolean
parse_host(const gchar * start,gsize length,GUriFlags flags,gchar ** out,GError ** error)553 parse_host (const gchar  *start,
554             gsize         length,
555             GUriFlags     flags,
556             gchar       **out,
557             GError      **error)
558 {
559   gchar *decoded = NULL, *host;
560   gchar *addr = NULL;
561 
562   if (*start == '[')
563     {
564       if (!parse_ip_literal (start, length, flags, &host, error))
565         return FALSE;
566       goto ok;
567     }
568 
569   if (g_ascii_isdigit (*start))
570     {
571       addr = g_strndup (start, length);
572       if (g_hostname_is_ip_address (addr))
573         {
574           host = addr;
575           goto ok;
576         }
577       g_free (addr);
578     }
579 
580   if (flags & G_URI_FLAGS_NON_DNS)
581     {
582       if (!uri_normalize (&decoded, start, length, flags,
583                           G_URI_ERROR_BAD_HOST, error))
584         return FALSE;
585       host = g_steal_pointer (&decoded);
586       goto ok;
587     }
588 
589   flags &= ~G_URI_FLAGS_ENCODED;
590   if (!uri_decode (&decoded, NULL, start, length, FALSE, flags,
591                    G_URI_ERROR_BAD_HOST, error))
592     return FALSE;
593 
594   /* You're not allowed to %-encode an IP address, so if it wasn't
595    * one before, it better not be one now.
596    */
597   if (g_hostname_is_ip_address (decoded))
598     {
599       g_free (decoded);
600       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
601                    _("Illegal encoded IP address ‘%.*s’ in URI"),
602                    (gint)length, start);
603       return FALSE;
604     }
605 
606   if (g_hostname_is_non_ascii (decoded))
607     {
608       host = g_hostname_to_ascii (decoded);
609       if (host == NULL)
610         {
611           g_free (decoded);
612           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
613                        _("Illegal internationalized hostname ‘%.*s’ in URI"),
614                        (gint) length, start);
615           return FALSE;
616         }
617     }
618   else
619     {
620       host = g_steal_pointer (&decoded);
621     }
622 
623  ok:
624   if (out)
625     *out = g_steal_pointer (&host);
626   g_free (host);
627   g_free (decoded);
628 
629   return TRUE;
630 }
631 
632 static gboolean
parse_port(const gchar * start,gsize length,gint * out,GError ** error)633 parse_port (const gchar  *start,
634             gsize         length,
635             gint         *out,
636             GError      **error)
637 {
638   gchar *end;
639   gulong parsed_port;
640 
641   /* strtoul() allows leading + or -, so we have to check this first. */
642   if (!g_ascii_isdigit (*start))
643     {
644       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
645                    _("Could not parse port ‘%.*s’ in URI"),
646                    (gint)length, start);
647       return FALSE;
648     }
649 
650   /* We know that *(start + length) is either '\0' or a non-numeric
651    * character, so strtoul() won't scan beyond it.
652    */
653   parsed_port = strtoul (start, &end, 10);
654   if (end != start + length)
655     {
656       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
657                    _("Could not parse port ‘%.*s’ in URI"),
658                    (gint)length, start);
659       return FALSE;
660     }
661   else if (parsed_port > 65535)
662     {
663       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
664                    _("Port ‘%.*s’ in URI is out of range"),
665                    (gint)length, start);
666       return FALSE;
667     }
668 
669   if (out)
670     *out = parsed_port;
671   return TRUE;
672 }
673 
674 static gboolean
parse_userinfo(const gchar * start,gsize length,GUriFlags flags,gchar ** user,gchar ** password,gchar ** auth_params,GError ** error)675 parse_userinfo (const gchar  *start,
676                 gsize         length,
677                 GUriFlags     flags,
678                 gchar       **user,
679                 gchar       **password,
680                 gchar       **auth_params,
681                 GError      **error)
682 {
683   const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
684 
685   auth_params_end = start + length;
686   if (flags & G_URI_FLAGS_HAS_AUTH_PARAMS)
687     password_end = memchr (start, ';', auth_params_end - start);
688   if (!password_end)
689     password_end = auth_params_end;
690   if (flags & G_URI_FLAGS_HAS_PASSWORD)
691     user_end = memchr (start, ':', password_end - start);
692   if (!user_end)
693     user_end = password_end;
694 
695   if (!uri_normalize (user, start, user_end - start, flags,
696                       G_URI_ERROR_BAD_USER, error))
697     return FALSE;
698 
699   if (*user_end == ':')
700     {
701       start = user_end + 1;
702       if (!uri_normalize (password, start, password_end - start, flags,
703                           G_URI_ERROR_BAD_PASSWORD, error))
704         {
705           if (user)
706             g_clear_pointer (user, g_free);
707           return FALSE;
708         }
709     }
710   else if (password)
711     *password = NULL;
712 
713   if (*password_end == ';')
714     {
715       start = password_end + 1;
716       if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
717                           G_URI_ERROR_BAD_AUTH_PARAMS, error))
718         {
719           if (user)
720             g_clear_pointer (user, g_free);
721           if (password)
722             g_clear_pointer (password, g_free);
723           return FALSE;
724         }
725     }
726   else if (auth_params)
727     *auth_params = NULL;
728 
729   return TRUE;
730 }
731 
732 static gchar *
uri_cleanup(const gchar * uri_string)733 uri_cleanup (const gchar *uri_string)
734 {
735   GString *copy;
736   const gchar *end;
737 
738   /* Skip leading whitespace */
739   while (g_ascii_isspace (*uri_string))
740     uri_string++;
741 
742   /* Ignore trailing whitespace */
743   end = uri_string + strlen (uri_string);
744   while (end > uri_string && g_ascii_isspace (*(end - 1)))
745     end--;
746 
747   /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
748   copy = g_string_sized_new (end - uri_string);
749   while (uri_string < end)
750     {
751       if (*uri_string == ' ')
752         g_string_append (copy, "%20");
753       else if (g_ascii_isspace (*uri_string))
754         ;
755       else
756         g_string_append_c (copy, *uri_string);
757       uri_string++;
758     }
759 
760   return g_string_free (copy, FALSE);
761 }
762 
763 static gboolean
should_normalize_empty_path(const char * scheme)764 should_normalize_empty_path (const char *scheme)
765 {
766   const char * const schemes[] = { "https", "http", "wss", "ws" };
767   gsize i;
768   for (i = 0; i < G_N_ELEMENTS (schemes); ++i)
769     {
770       if (!strcmp (schemes[i], scheme))
771         return TRUE;
772     }
773   return FALSE;
774 }
775 
776 static int
normalize_port(const char * scheme,int port)777 normalize_port (const char *scheme,
778                 int         port)
779 {
780   const char *default_schemes[3] = { NULL };
781   int i;
782 
783   switch (port)
784     {
785     case 21:
786       default_schemes[0] = "ftp";
787       break;
788     case 80:
789       default_schemes[0] = "http";
790       default_schemes[1] = "ws";
791       break;
792     case 443:
793       default_schemes[0] = "https";
794       default_schemes[1] = "wss";
795       break;
796     default:
797       break;
798     }
799 
800   for (i = 0; default_schemes[i]; ++i)
801     {
802       if (!strcmp (scheme, default_schemes[i]))
803         return -1;
804     }
805 
806   return port;
807 }
808 
809 static int
default_scheme_port(const char * scheme)810 default_scheme_port (const char *scheme)
811 {
812   if (strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0)
813     return 80;
814 
815   if (strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0)
816     return 443;
817 
818   if (strcmp (scheme, "ftp") == 0)
819     return 21;
820 
821   return -1;
822 }
823 
824 static gboolean
g_uri_split_internal(const gchar * uri_string,GUriFlags flags,gchar ** scheme,gchar ** userinfo,gchar ** user,gchar ** password,gchar ** auth_params,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)825 g_uri_split_internal (const gchar  *uri_string,
826                       GUriFlags     flags,
827                       gchar       **scheme,
828                       gchar       **userinfo,
829                       gchar       **user,
830                       gchar       **password,
831                       gchar       **auth_params,
832                       gchar       **host,
833                       gint         *port,
834                       gchar       **path,
835                       gchar       **query,
836                       gchar       **fragment,
837                       GError      **error)
838 {
839   const gchar *end, *colon, *at, *path_start, *semi, *question;
840   const gchar *p, *bracket, *hostend;
841   gchar *cleaned_uri_string = NULL;
842   gchar *normalized_scheme = NULL;
843 
844   if (scheme)
845     *scheme = NULL;
846   if (userinfo)
847     *userinfo = NULL;
848   if (user)
849     *user = NULL;
850   if (password)
851     *password = NULL;
852   if (auth_params)
853     *auth_params = NULL;
854   if (host)
855     *host = NULL;
856   if (port)
857     *port = -1;
858   if (path)
859     *path = NULL;
860   if (query)
861     *query = NULL;
862   if (fragment)
863     *fragment = NULL;
864 
865   if ((flags & G_URI_FLAGS_PARSE_RELAXED) && strpbrk (uri_string, " \t\n\r"))
866     {
867       cleaned_uri_string = uri_cleanup (uri_string);
868       uri_string = cleaned_uri_string;
869     }
870 
871   /* Find scheme */
872   p = uri_string;
873   while (*p && (g_ascii_isalpha (*p) ||
874                (p > uri_string && (g_ascii_isdigit (*p) ||
875                                    *p == '.' || *p == '+' || *p == '-'))))
876     p++;
877 
878   if (p > uri_string && *p == ':')
879     {
880       normalized_scheme = g_ascii_strdown (uri_string, p - uri_string);
881       if (scheme)
882         *scheme = g_steal_pointer (&normalized_scheme);
883       p++;
884     }
885   else
886     {
887       if (scheme)
888         *scheme = NULL;
889       p = uri_string;
890     }
891 
892   /* Check for authority */
893   if (strncmp (p, "//", 2) == 0)
894     {
895       p += 2;
896 
897       path_start = p + strcspn (p, "/?#");
898       at = memchr (p, '@', path_start - p);
899       if (at)
900         {
901           if (flags & G_URI_FLAGS_PARSE_RELAXED)
902             {
903               gchar *next_at;
904 
905               /* Any "@"s in the userinfo must be %-encoded, but
906                * people get this wrong sometimes. Since "@"s in the
907                * hostname are unlikely (and also wrong anyway), assume
908                * that if there are extra "@"s, they belong in the
909                * userinfo.
910                */
911               do
912                 {
913                   next_at = memchr (at + 1, '@', path_start - (at + 1));
914                   if (next_at)
915                     at = next_at;
916                 }
917               while (next_at);
918             }
919 
920           if (user || password || auth_params ||
921               (flags & (G_URI_FLAGS_HAS_PASSWORD|G_URI_FLAGS_HAS_AUTH_PARAMS)))
922             {
923               if (!parse_userinfo (p, at - p, flags,
924                                    user, password, auth_params,
925                                    error))
926                 goto fail;
927             }
928 
929           if (!uri_normalize (userinfo, p, at - p, flags,
930                               G_URI_ERROR_BAD_USER, error))
931             goto fail;
932 
933           p = at + 1;
934         }
935 
936       if (flags & G_URI_FLAGS_PARSE_RELAXED)
937         {
938           semi = strchr (p, ';');
939           if (semi && semi < path_start)
940             {
941               /* Technically, semicolons are allowed in the "host"
942                * production, but no one ever does this, and some
943                * schemes mistakenly use semicolon as a delimiter
944                * marking the start of the path. We have to check this
945                * after checking for userinfo though, because a
946                * semicolon before the "@" must be part of the
947                * userinfo.
948                */
949               path_start = semi;
950             }
951         }
952 
953       /* Find host and port. The host may be a bracket-delimited IPv6
954        * address, in which case the colon delimiting the port must come
955        * (immediately) after the close bracket.
956        */
957       if (*p == '[')
958         {
959           bracket = memchr (p, ']', path_start - p);
960           if (bracket && *(bracket + 1) == ':')
961             colon = bracket + 1;
962           else
963             colon = NULL;
964         }
965       else
966         colon = memchr (p, ':', path_start - p);
967 
968       hostend = colon ? colon : path_start;
969       if (!parse_host (p, hostend - p, flags, host, error))
970         goto fail;
971 
972       if (colon && colon != path_start - 1)
973         {
974           p = colon + 1;
975           if (!parse_port (p, path_start - p, port, error))
976             goto fail;
977         }
978 
979       p = path_start;
980     }
981 
982   /* Find fragment. */
983   end = p + strcspn (p, "#");
984   if (*end == '#')
985     {
986       if (!uri_normalize (fragment, end + 1, strlen (end + 1),
987                           flags | (flags & G_URI_FLAGS_ENCODED_FRAGMENT ? G_URI_FLAGS_ENCODED : 0),
988                           G_URI_ERROR_BAD_FRAGMENT, error))
989         goto fail;
990     }
991 
992   /* Find query */
993   question = memchr (p, '?', end - p);
994   if (question)
995     {
996       if (!uri_normalize (query, question + 1, end - (question + 1),
997                           flags | (flags & G_URI_FLAGS_ENCODED_QUERY ? G_URI_FLAGS_ENCODED : 0),
998                           G_URI_ERROR_BAD_QUERY, error))
999         goto fail;
1000       end = question;
1001     }
1002 
1003   if (!uri_normalize (path, p, end - p,
1004                       flags | (flags & G_URI_FLAGS_ENCODED_PATH ? G_URI_FLAGS_ENCODED : 0),
1005                       G_URI_ERROR_BAD_PATH, error))
1006     goto fail;
1007 
1008   /* Scheme-based normalization */
1009   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && ((scheme && *scheme) || normalized_scheme))
1010     {
1011       const char *scheme_str = scheme && *scheme ? *scheme : normalized_scheme;
1012 
1013       if (should_normalize_empty_path (scheme_str) && path && !**path)
1014         {
1015           g_free (*path);
1016           *path = g_strdup ("/");
1017         }
1018 
1019       if (port && *port == -1)
1020         *port = default_scheme_port (scheme_str);
1021     }
1022 
1023   g_free (normalized_scheme);
1024   g_free (cleaned_uri_string);
1025   return TRUE;
1026 
1027  fail:
1028   if (scheme)
1029     g_clear_pointer (scheme, g_free);
1030   if (userinfo)
1031     g_clear_pointer (userinfo, g_free);
1032   if (host)
1033     g_clear_pointer (host, g_free);
1034   if (port)
1035     *port = -1;
1036   if (path)
1037     g_clear_pointer (path, g_free);
1038   if (query)
1039     g_clear_pointer (query, g_free);
1040   if (fragment)
1041     g_clear_pointer (fragment, g_free);
1042 
1043   g_free (normalized_scheme);
1044   g_free (cleaned_uri_string);
1045   return FALSE;
1046 }
1047 
1048 /**
1049  * g_uri_split:
1050  * @uri_ref: a string containing a relative or absolute URI
1051  * @flags: flags for parsing @uri_ref
1052  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1053  *    the scheme (converted to lowercase), or %NULL
1054  * @userinfo: (out) (nullable) (optional) (transfer full): on return, contains
1055  *    the userinfo, or %NULL
1056  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1057  *    host, or %NULL
1058  * @port: (out) (optional) (transfer full): on return, contains the
1059  *    port, or `-1`
1060  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1061  *    path
1062  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1063  *    query, or %NULL
1064  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1065  *    the fragment, or %NULL
1066  * @error: #GError for error reporting, or %NULL to ignore.
1067  *
1068  * Parses @uri_ref (which can be an
1069  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1070  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1071  * returned as %NULL (but note that all URIs always have a path component,
1072  * though it may be the empty string).
1073  *
1074  * If @flags contains %G_URI_FLAGS_ENCODED, then `%`-encoded characters in
1075  * @uri_ref will remain encoded in the output strings. (If not,
1076  * then all such characters will be decoded.) Note that decoding will
1077  * only work if the URI components are ASCII or UTF-8, so you will
1078  * need to use %G_URI_FLAGS_ENCODED if they are not.
1079  *
1080  * Note that the %G_URI_FLAGS_HAS_PASSWORD and
1081  * %G_URI_FLAGS_HAS_AUTH_PARAMS @flags are ignored by g_uri_split(),
1082  * since it always returns only the full userinfo; use
1083  * g_uri_split_with_user() if you want it split up.
1084  *
1085  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1086  *   on error.
1087  *
1088  * Since: 2.66
1089  */
1090 gboolean
g_uri_split(const gchar * uri_ref,GUriFlags flags,gchar ** scheme,gchar ** userinfo,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)1091 g_uri_split (const gchar  *uri_ref,
1092              GUriFlags     flags,
1093              gchar       **scheme,
1094              gchar       **userinfo,
1095              gchar       **host,
1096              gint         *port,
1097              gchar       **path,
1098              gchar       **query,
1099              gchar       **fragment,
1100              GError      **error)
1101 {
1102   g_return_val_if_fail (uri_ref != NULL, FALSE);
1103   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1104 
1105   return g_uri_split_internal (uri_ref, flags,
1106                                scheme, userinfo, NULL, NULL, NULL,
1107                                host, port, path, query, fragment,
1108                                error);
1109 }
1110 
1111 /**
1112  * g_uri_split_with_user:
1113  * @uri_ref: a string containing a relative or absolute URI
1114  * @flags: flags for parsing @uri_ref
1115  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1116  *    the scheme (converted to lowercase), or %NULL
1117  * @user: (out) (nullable) (optional) (transfer full): on return, contains
1118  *    the user, or %NULL
1119  * @password: (out) (nullable) (optional) (transfer full): on return, contains
1120  *    the password, or %NULL
1121  * @auth_params: (out) (nullable) (optional) (transfer full): on return, contains
1122  *    the auth_params, or %NULL
1123  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1124  *    host, or %NULL
1125  * @port: (out) (optional) (transfer full): on return, contains the
1126  *    port, or `-1`
1127  * @path: (out) (not nullable) (optional) (transfer full): on return, contains the
1128  *    path
1129  * @query: (out) (nullable) (optional) (transfer full): on return, contains the
1130  *    query, or %NULL
1131  * @fragment: (out) (nullable) (optional) (transfer full): on return, contains
1132  *    the fragment, or %NULL
1133  * @error: #GError for error reporting, or %NULL to ignore.
1134  *
1135  * Parses @uri_ref (which can be an
1136  * [absolute or relative URI][relative-absolute-uris]) according to @flags, and
1137  * returns the pieces. Any component that doesn't appear in @uri_ref will be
1138  * returned as %NULL (but note that all URIs always have a path component,
1139  * though it may be the empty string).
1140  *
1141  * See g_uri_split(), and the definition of #GUriFlags, for more
1142  * information on the effect of @flags. Note that @password will only
1143  * be parsed out if @flags contains %G_URI_FLAGS_HAS_PASSWORD, and
1144  * @auth_params will only be parsed out if @flags contains
1145  * %G_URI_FLAGS_HAS_AUTH_PARAMS.
1146  *
1147  * Returns: (skip): %TRUE if @uri_ref parsed successfully, %FALSE
1148  *   on error.
1149  *
1150  * Since: 2.66
1151  */
1152 gboolean
g_uri_split_with_user(const gchar * uri_ref,GUriFlags flags,gchar ** scheme,gchar ** user,gchar ** password,gchar ** auth_params,gchar ** host,gint * port,gchar ** path,gchar ** query,gchar ** fragment,GError ** error)1153 g_uri_split_with_user (const gchar  *uri_ref,
1154                        GUriFlags     flags,
1155                        gchar       **scheme,
1156                        gchar       **user,
1157                        gchar       **password,
1158                        gchar       **auth_params,
1159                        gchar       **host,
1160                        gint         *port,
1161                        gchar       **path,
1162                        gchar       **query,
1163                        gchar       **fragment,
1164                        GError      **error)
1165 {
1166   g_return_val_if_fail (uri_ref != NULL, FALSE);
1167   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1168 
1169   return g_uri_split_internal (uri_ref, flags,
1170                                scheme, NULL, user, password, auth_params,
1171                                host, port, path, query, fragment,
1172                                error);
1173 }
1174 
1175 
1176 /**
1177  * g_uri_split_network:
1178  * @uri_string: a string containing an absolute URI
1179  * @flags: flags for parsing @uri_string
1180  * @scheme: (out) (nullable) (optional) (transfer full): on return, contains
1181  *    the scheme (converted to lowercase), or %NULL
1182  * @host: (out) (nullable) (optional) (transfer full): on return, contains the
1183  *    host, or %NULL
1184  * @port: (out) (optional) (transfer full): on return, contains the
1185  *    port, or `-1`
1186  * @error: #GError for error reporting, or %NULL to ignore.
1187  *
1188  * Parses @uri_string (which must be an [absolute URI][relative-absolute-uris])
1189  * according to @flags, and returns the pieces relevant to connecting to a host.
1190  * See the documentation for g_uri_split() for more details; this is
1191  * mostly a wrapper around that function with simpler arguments.
1192  * However, it will return an error if @uri_string is a relative URI,
1193  * or does not contain a hostname component.
1194  *
1195  * Returns: (skip): %TRUE if @uri_string parsed successfully,
1196  *   %FALSE on error.
1197  *
1198  * Since: 2.66
1199  */
1200 gboolean
g_uri_split_network(const gchar * uri_string,GUriFlags flags,gchar ** scheme,gchar ** host,gint * port,GError ** error)1201 g_uri_split_network (const gchar  *uri_string,
1202                      GUriFlags     flags,
1203                      gchar       **scheme,
1204                      gchar       **host,
1205                      gint         *port,
1206                      GError      **error)
1207 {
1208   gchar *my_scheme = NULL, *my_host = NULL;
1209 
1210   g_return_val_if_fail (uri_string != NULL, FALSE);
1211   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1212 
1213   if (!g_uri_split_internal (uri_string, flags,
1214                              &my_scheme, NULL, NULL, NULL, NULL,
1215                              &my_host, port, NULL, NULL, NULL,
1216                              error))
1217     return FALSE;
1218 
1219   if (!my_scheme || !my_host)
1220     {
1221       if (!my_scheme)
1222         {
1223           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1224                        _("URI ‘%s’ is not an absolute URI"),
1225                        uri_string);
1226         }
1227       else
1228         {
1229           g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
1230                        _("URI ‘%s’ has no host component"),
1231                        uri_string);
1232         }
1233       g_free (my_scheme);
1234       g_free (my_host);
1235 
1236       return FALSE;
1237     }
1238 
1239   if (scheme)
1240     *scheme = g_steal_pointer (&my_scheme);
1241   if (host)
1242     *host = g_steal_pointer (&my_host);
1243 
1244   g_free (my_scheme);
1245   g_free (my_host);
1246 
1247   return TRUE;
1248 }
1249 
1250 /**
1251  * g_uri_is_valid:
1252  * @uri_string: a string containing an absolute URI
1253  * @flags: flags for parsing @uri_string
1254  * @error: #GError for error reporting, or %NULL to ignore.
1255  *
1256  * Parses @uri_string according to @flags, to determine whether it is a valid
1257  * [absolute URI][relative-absolute-uris], i.e. it does not need to be resolved
1258  * relative to another URI using g_uri_parse_relative().
1259  *
1260  * If it’s not a valid URI, an error is returned explaining how it’s invalid.
1261  *
1262  * See g_uri_split(), and the definition of #GUriFlags, for more
1263  * information on the effect of @flags.
1264  *
1265  * Returns: %TRUE if @uri_string is a valid absolute URI, %FALSE on error.
1266  *
1267  * Since: 2.66
1268  */
1269 gboolean
g_uri_is_valid(const gchar * uri_string,GUriFlags flags,GError ** error)1270 g_uri_is_valid (const gchar  *uri_string,
1271                 GUriFlags     flags,
1272                 GError      **error)
1273 {
1274   gchar *my_scheme = NULL;
1275 
1276   g_return_val_if_fail (uri_string != NULL, FALSE);
1277   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
1278 
1279   if (!g_uri_split_internal (uri_string, flags,
1280                              &my_scheme, NULL, NULL, NULL, NULL,
1281                              NULL, NULL, NULL, NULL, NULL,
1282                              error))
1283     return FALSE;
1284 
1285   if (!my_scheme)
1286     {
1287       g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
1288                    _("URI ‘%s’ is not an absolute URI"),
1289                    uri_string);
1290       return FALSE;
1291     }
1292 
1293   g_free (my_scheme);
1294 
1295   return TRUE;
1296 }
1297 
1298 
1299 /* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
1300  * RFC 3986.
1301  *
1302  * See https://tools.ietf.org/html/rfc3986#section-5.2.4
1303  */
1304 static void
remove_dot_segments(gchar * path)1305 remove_dot_segments (gchar *path)
1306 {
1307   /* The output can be written to the same buffer that the input
1308    * is read from, as the output pointer is only ever increased
1309    * when the input pointer is increased as well, and the input
1310    * pointer is never decreased. */
1311   gchar *input = path;
1312   gchar *output = path;
1313 
1314   if (!*path)
1315     return;
1316 
1317   while (*input)
1318     {
1319       /*  A.  If the input buffer begins with a prefix of "../" or "./",
1320        *      then remove that prefix from the input buffer; otherwise,
1321        */
1322       if (strncmp (input, "../", 3) == 0)
1323         input += 3;
1324       else if (strncmp (input, "./", 2) == 0)
1325         input += 2;
1326 
1327       /*  B.  if the input buffer begins with a prefix of "/./" or "/.",
1328        *      where "." is a complete path segment, then replace that
1329        *      prefix with "/" in the input buffer; otherwise,
1330        */
1331       else if (strncmp (input, "/./", 3) == 0)
1332         input += 2;
1333       else if (strcmp (input, "/.") == 0)
1334         input[1] = '\0';
1335 
1336       /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
1337        *      where ".." is a complete path segment, then replace that
1338        *      prefix with "/" in the input buffer and remove the last
1339        *      segment and its preceding "/" (if any) from the output
1340        *      buffer; otherwise,
1341        */
1342       else if (strncmp (input, "/../", 4) == 0)
1343         {
1344           input += 3;
1345           if (output > path)
1346             {
1347               do
1348                 {
1349                   output--;
1350                 }
1351               while (*output != '/' && output > path);
1352             }
1353         }
1354       else if (strcmp (input, "/..") == 0)
1355         {
1356           input[1] = '\0';
1357           if (output > path)
1358             {
1359               do
1360                  {
1361                    output--;
1362                  }
1363               while (*output != '/' && output > path);
1364             }
1365         }
1366 
1367       /*  D.  if the input buffer consists only of "." or "..", then remove
1368        *      that from the input buffer; otherwise,
1369        */
1370       else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
1371         input[0] = '\0';
1372 
1373       /*  E.  move the first path segment in the input buffer to the end of
1374        *      the output buffer, including the initial "/" character (if
1375        *      any) and any subsequent characters up to, but not including,
1376        *      the next "/" character or the end of the input buffer.
1377        */
1378       else
1379         {
1380           *output++ = *input++;
1381           while (*input && *input != '/')
1382             *output++ = *input++;
1383         }
1384     }
1385   *output = '\0';
1386 }
1387 
1388 /**
1389  * g_uri_parse:
1390  * @uri_string: a string representing an absolute URI
1391  * @flags: flags describing how to parse @uri_string
1392  * @error: #GError for error reporting, or %NULL to ignore.
1393  *
1394  * Parses @uri_string according to @flags. If the result is not a
1395  * valid [absolute URI][relative-absolute-uris], it will be discarded, and an
1396  * error returned.
1397  *
1398  * Return value: (transfer full): a new #GUri, or NULL on error.
1399  *
1400  * Since: 2.66
1401  */
1402 GUri *
g_uri_parse(const gchar * uri_string,GUriFlags flags,GError ** error)1403 g_uri_parse (const gchar  *uri_string,
1404              GUriFlags     flags,
1405              GError      **error)
1406 {
1407   g_return_val_if_fail (uri_string != NULL, NULL);
1408   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1409 
1410   return g_uri_parse_relative (NULL, uri_string, flags, error);
1411 }
1412 
1413 /**
1414  * g_uri_parse_relative:
1415  * @base_uri: (nullable) (transfer none): a base absolute URI
1416  * @uri_ref: a string representing a relative or absolute URI
1417  * @flags: flags describing how to parse @uri_ref
1418  * @error: #GError for error reporting, or %NULL to ignore.
1419  *
1420  * Parses @uri_ref according to @flags and, if it is a
1421  * [relative URI][relative-absolute-uris], resolves it relative to @base_uri.
1422  * If the result is not a valid absolute URI, it will be discarded, and an error
1423  * returned.
1424  *
1425  * Return value: (transfer full): a new #GUri, or NULL on error.
1426  *
1427  * Since: 2.66
1428  */
1429 GUri *
g_uri_parse_relative(GUri * base_uri,const gchar * uri_ref,GUriFlags flags,GError ** error)1430 g_uri_parse_relative (GUri         *base_uri,
1431                       const gchar  *uri_ref,
1432                       GUriFlags     flags,
1433                       GError      **error)
1434 {
1435   GUri *uri = NULL;
1436 
1437   g_return_val_if_fail (uri_ref != NULL, NULL);
1438   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1439   g_return_val_if_fail (base_uri == NULL || base_uri->scheme != NULL, NULL);
1440 
1441   /* Use GUri struct to construct the return value: there is no guarantee it is
1442    * actually correct within the function body. */
1443   uri = g_atomic_rc_box_new0 (GUri);
1444   uri->flags = flags;
1445 
1446   if (!g_uri_split_internal (uri_ref, flags,
1447                              &uri->scheme, &uri->userinfo,
1448                              &uri->user, &uri->password, &uri->auth_params,
1449                              &uri->host, &uri->port,
1450                              &uri->path, &uri->query, &uri->fragment,
1451                              error))
1452     {
1453       g_uri_unref (uri);
1454       return NULL;
1455     }
1456 
1457   if (!uri->scheme && !base_uri)
1458     {
1459       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
1460                            _("URI is not absolute, and no base URI was provided"));
1461       g_uri_unref (uri);
1462       return NULL;
1463     }
1464 
1465   if (base_uri)
1466     {
1467       /* This is section 5.2.2 of RFC 3986, except that we're doing
1468        * it in place in @uri rather than copying from R to T.
1469        *
1470        * See https://tools.ietf.org/html/rfc3986#section-5.2.2
1471        */
1472       if (uri->scheme)
1473         remove_dot_segments (uri->path);
1474       else
1475         {
1476           uri->scheme = g_strdup (base_uri->scheme);
1477           if (uri->host)
1478             remove_dot_segments (uri->path);
1479           else
1480             {
1481               if (!*uri->path)
1482                 {
1483                   g_free (uri->path);
1484                   uri->path = g_strdup (base_uri->path);
1485                   if (!uri->query)
1486                     uri->query = g_strdup (base_uri->query);
1487                 }
1488               else
1489                 {
1490                   if (*uri->path == '/')
1491                     remove_dot_segments (uri->path);
1492                   else
1493                     {
1494                       gchar *newpath, *last;
1495 
1496                       last = strrchr (base_uri->path, '/');
1497                       if (last)
1498                         {
1499                           newpath = g_strdup_printf ("%.*s/%s",
1500                                                      (gint)(last - base_uri->path),
1501                                                      base_uri->path,
1502                                                      uri->path);
1503                         }
1504                       else
1505                         newpath = g_strdup_printf ("/%s", uri->path);
1506 
1507                       g_free (uri->path);
1508                       uri->path = g_steal_pointer (&newpath);
1509 
1510                       remove_dot_segments (uri->path);
1511                     }
1512                 }
1513 
1514               uri->userinfo = g_strdup (base_uri->userinfo);
1515               uri->user = g_strdup (base_uri->user);
1516               uri->password = g_strdup (base_uri->password);
1517               uri->auth_params = g_strdup (base_uri->auth_params);
1518               uri->host = g_strdup (base_uri->host);
1519               uri->port = base_uri->port;
1520             }
1521         }
1522 
1523       /* Scheme normalization couldn't have been done earlier
1524        * as the relative URI may not have had a scheme */
1525       if (flags & G_URI_FLAGS_SCHEME_NORMALIZE)
1526         {
1527           if (should_normalize_empty_path (uri->scheme) && !*uri->path)
1528             {
1529               g_free (uri->path);
1530               uri->path = g_strdup ("/");
1531             }
1532 
1533           uri->port = normalize_port (uri->scheme, uri->port);
1534         }
1535     }
1536   else
1537     {
1538       remove_dot_segments (uri->path);
1539     }
1540 
1541   return g_steal_pointer (&uri);
1542 }
1543 
1544 /**
1545  * g_uri_resolve_relative:
1546  * @base_uri_string: (nullable): a string representing a base URI
1547  * @uri_ref: a string representing a relative or absolute URI
1548  * @flags: flags describing how to parse @uri_ref
1549  * @error: #GError for error reporting, or %NULL to ignore.
1550  *
1551  * Parses @uri_ref according to @flags and, if it is a
1552  * [relative URI][relative-absolute-uris], resolves it relative to
1553  * @base_uri_string. If the result is not a valid absolute URI, it will be
1554  * discarded, and an error returned.
1555  *
1556  * (If @base_uri_string is %NULL, this just returns @uri_ref, or
1557  * %NULL if @uri_ref is invalid or not absolute.)
1558  *
1559  * Return value: (transfer full): the resolved URI string,
1560  * or NULL on error.
1561  *
1562  * Since: 2.66
1563  */
1564 gchar *
g_uri_resolve_relative(const gchar * base_uri_string,const gchar * uri_ref,GUriFlags flags,GError ** error)1565 g_uri_resolve_relative (const gchar  *base_uri_string,
1566                         const gchar  *uri_ref,
1567                         GUriFlags     flags,
1568                         GError      **error)
1569 {
1570   GUri *base_uri, *resolved_uri;
1571   gchar *resolved_uri_string;
1572 
1573   g_return_val_if_fail (uri_ref != NULL, NULL);
1574   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
1575 
1576   flags |= G_URI_FLAGS_ENCODED;
1577 
1578   if (base_uri_string)
1579     {
1580       base_uri = g_uri_parse (base_uri_string, flags, error);
1581       if (!base_uri)
1582         return NULL;
1583     }
1584   else
1585     base_uri = NULL;
1586 
1587   resolved_uri = g_uri_parse_relative (base_uri, uri_ref, flags, error);
1588   if (base_uri)
1589     g_uri_unref (base_uri);
1590   if (!resolved_uri)
1591     return NULL;
1592 
1593   resolved_uri_string = g_uri_to_string (resolved_uri);
1594   g_uri_unref (resolved_uri);
1595   return g_steal_pointer (&resolved_uri_string);
1596 }
1597 
1598 /* userinfo as a whole can contain sub-delims + ":", but split-out
1599  * user can't contain ":" or ";", and split-out password can't contain
1600  * ";".
1601  */
1602 #define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
1603 #define USER_ALLOWED_CHARS "!$&'()*+,="
1604 #define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
1605 #define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
1606 #define IP_ADDR_ALLOWED_CHARS ":"
1607 #define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
1608 #define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
1609 #define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1610 #define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
1611 
1612 static gchar *
g_uri_join_internal(GUriFlags flags,const gchar * scheme,gboolean userinfo,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1613 g_uri_join_internal (GUriFlags    flags,
1614                      const gchar *scheme,
1615                      gboolean     userinfo,
1616                      const gchar *user,
1617                      const gchar *password,
1618                      const gchar *auth_params,
1619                      const gchar *host,
1620                      gint         port,
1621                      const gchar *path,
1622                      const gchar *query,
1623                      const gchar *fragment)
1624 {
1625   gboolean encoded = (flags & G_URI_FLAGS_ENCODED);
1626   GString *str;
1627   char *normalized_scheme = NULL;
1628 
1629   /* Restrictions on path prefixes. See:
1630    * https://tools.ietf.org/html/rfc3986#section-3
1631    */
1632   g_return_val_if_fail (path != NULL, NULL);
1633   g_return_val_if_fail (host == NULL || (path[0] == '\0' || path[0] == '/'), NULL);
1634   g_return_val_if_fail (host != NULL || (path[0] != '/' || path[1] != '/'), NULL);
1635 
1636   str = g_string_new (scheme);
1637   if (scheme)
1638     g_string_append_c (str, ':');
1639 
1640   if (flags & G_URI_FLAGS_SCHEME_NORMALIZE && scheme && ((host && port != -1) || path[0] == '\0'))
1641     normalized_scheme = g_ascii_strdown (scheme, -1);
1642 
1643   if (host)
1644     {
1645       g_string_append (str, "//");
1646 
1647       if (user)
1648         {
1649           if (encoded)
1650             g_string_append (str, user);
1651           else
1652             {
1653               if (userinfo)
1654                 g_string_append_uri_escaped (str, user, USERINFO_ALLOWED_CHARS, TRUE);
1655               else
1656                 /* Encode ':' and ';' regardless of whether we have a
1657                  * password or auth params, since it may be parsed later
1658                  * under the assumption that it does.
1659                  */
1660                 g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
1661             }
1662 
1663           if (password)
1664             {
1665               g_string_append_c (str, ':');
1666               if (encoded)
1667                 g_string_append (str, password);
1668               else
1669                 g_string_append_uri_escaped (str, password,
1670                                              PASSWORD_ALLOWED_CHARS, TRUE);
1671             }
1672 
1673           if (auth_params)
1674             {
1675               g_string_append_c (str, ';');
1676               if (encoded)
1677                 g_string_append (str, auth_params);
1678               else
1679                 g_string_append_uri_escaped (str, auth_params,
1680                                              AUTH_PARAMS_ALLOWED_CHARS, TRUE);
1681             }
1682 
1683           g_string_append_c (str, '@');
1684         }
1685 
1686       if (strchr (host, ':') && g_hostname_is_ip_address (host))
1687         {
1688           g_string_append_c (str, '[');
1689           if (encoded)
1690             g_string_append (str, host);
1691           else
1692             g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
1693           g_string_append_c (str, ']');
1694         }
1695       else
1696         {
1697           if (encoded)
1698             g_string_append (str, host);
1699           else
1700             g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
1701         }
1702 
1703       if (port != -1 && (!normalized_scheme || normalize_port (normalized_scheme, port) != -1))
1704         g_string_append_printf (str, ":%d", port);
1705     }
1706 
1707   if (path[0] == '\0' && normalized_scheme && should_normalize_empty_path (normalized_scheme))
1708     g_string_append (str, "/");
1709   else if (encoded || flags & G_URI_FLAGS_ENCODED_PATH)
1710     g_string_append (str, path);
1711   else
1712     g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
1713 
1714   g_free (normalized_scheme);
1715 
1716   if (query)
1717     {
1718       g_string_append_c (str, '?');
1719       if (encoded || flags & G_URI_FLAGS_ENCODED_QUERY)
1720         g_string_append (str, query);
1721       else
1722         g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
1723     }
1724   if (fragment)
1725     {
1726       g_string_append_c (str, '#');
1727       if (encoded || flags & G_URI_FLAGS_ENCODED_FRAGMENT)
1728         g_string_append (str, fragment);
1729       else
1730         g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
1731     }
1732 
1733   return g_string_free (str, FALSE);
1734 }
1735 
1736 /**
1737  * g_uri_join:
1738  * @flags: flags describing how to build the URI string
1739  * @scheme: (nullable): the URI scheme, or %NULL
1740  * @userinfo: (nullable): the userinfo component, or %NULL
1741  * @host: (nullable): the host component, or %NULL
1742  * @port: the port, or `-1`
1743  * @path: (not nullable): the path component
1744  * @query: (nullable): the query component, or %NULL
1745  * @fragment: (nullable): the fragment, or %NULL
1746  *
1747  * Joins the given components together according to @flags to create
1748  * an absolute URI string. @path may not be %NULL (though it may be the empty
1749  * string).
1750  *
1751  * When @host is present, @path must either be empty or begin with a slash (`/`)
1752  * character. When @host is not present, @path cannot begin with two slash
1753    characters (`//`). See
1754  * [RFC 3986, section 3](https://tools.ietf.org/html/rfc3986#section-3).
1755  *
1756  * See also g_uri_join_with_user(), which allows specifying the
1757  * components of the ‘userinfo’ separately.
1758  *
1759  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1760  * in @flags.
1761  *
1762  * Return value: (not nullable) (transfer full): an absolute URI string
1763  *
1764  * Since: 2.66
1765  */
1766 gchar *
g_uri_join(GUriFlags flags,const gchar * scheme,const gchar * userinfo,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1767 g_uri_join (GUriFlags    flags,
1768             const gchar *scheme,
1769             const gchar *userinfo,
1770             const gchar *host,
1771             gint         port,
1772             const gchar *path,
1773             const gchar *query,
1774             const gchar *fragment)
1775 {
1776   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1777   g_return_val_if_fail (path != NULL, NULL);
1778 
1779   return g_uri_join_internal (flags,
1780                               scheme,
1781                               TRUE, userinfo, NULL, NULL,
1782                               host,
1783                               port,
1784                               path,
1785                               query,
1786                               fragment);
1787 }
1788 
1789 /**
1790  * g_uri_join_with_user:
1791  * @flags: flags describing how to build the URI string
1792  * @scheme: (nullable): the URI scheme, or %NULL
1793  * @user: (nullable): the user component of the userinfo, or %NULL
1794  * @password: (nullable): the password component of the userinfo, or
1795  *   %NULL
1796  * @auth_params: (nullable): the auth params of the userinfo, or
1797  *   %NULL
1798  * @host: (nullable): the host component, or %NULL
1799  * @port: the port, or `-1`
1800  * @path: (not nullable): the path component
1801  * @query: (nullable): the query component, or %NULL
1802  * @fragment: (nullable): the fragment, or %NULL
1803  *
1804  * Joins the given components together according to @flags to create
1805  * an absolute URI string. @path may not be %NULL (though it may be the empty
1806  * string).
1807  *
1808  * In contrast to g_uri_join(), this allows specifying the components
1809  * of the ‘userinfo’ separately. It otherwise behaves the same.
1810  *
1811  * %G_URI_FLAGS_HAS_PASSWORD and %G_URI_FLAGS_HAS_AUTH_PARAMS are ignored if set
1812  * in @flags.
1813  *
1814  * Return value: (not nullable) (transfer full): an absolute URI string
1815  *
1816  * Since: 2.66
1817  */
1818 gchar *
g_uri_join_with_user(GUriFlags flags,const gchar * scheme,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1819 g_uri_join_with_user (GUriFlags    flags,
1820                       const gchar *scheme,
1821                       const gchar *user,
1822                       const gchar *password,
1823                       const gchar *auth_params,
1824                       const gchar *host,
1825                       gint         port,
1826                       const gchar *path,
1827                       const gchar *query,
1828                       const gchar *fragment)
1829 {
1830   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1831   g_return_val_if_fail (path != NULL, NULL);
1832 
1833   return g_uri_join_internal (flags,
1834                               scheme,
1835                               FALSE, user, password, auth_params,
1836                               host,
1837                               port,
1838                               path,
1839                               query,
1840                               fragment);
1841 }
1842 
1843 /**
1844  * g_uri_build:
1845  * @flags: flags describing how to build the #GUri
1846  * @scheme: (not nullable): the URI scheme
1847  * @userinfo: (nullable): the userinfo component, or %NULL
1848  * @host: (nullable): the host component, or %NULL
1849  * @port: the port, or `-1`
1850  * @path: (not nullable): the path component
1851  * @query: (nullable): the query component, or %NULL
1852  * @fragment: (nullable): the fragment, or %NULL
1853  *
1854  * Creates a new #GUri from the given components according to @flags.
1855  *
1856  * See also g_uri_build_with_user(), which allows specifying the
1857  * components of the "userinfo" separately.
1858  *
1859  * Return value: (not nullable) (transfer full): a new #GUri
1860  *
1861  * Since: 2.66
1862  */
1863 GUri *
g_uri_build(GUriFlags flags,const gchar * scheme,const gchar * userinfo,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1864 g_uri_build (GUriFlags    flags,
1865              const gchar *scheme,
1866              const gchar *userinfo,
1867              const gchar *host,
1868              gint         port,
1869              const gchar *path,
1870              const gchar *query,
1871              const gchar *fragment)
1872 {
1873   GUri *uri;
1874 
1875   g_return_val_if_fail (scheme != NULL, NULL);
1876   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1877   g_return_val_if_fail (path != NULL, NULL);
1878 
1879   uri = g_atomic_rc_box_new0 (GUri);
1880   uri->flags = flags;
1881   uri->scheme = g_ascii_strdown (scheme, -1);
1882   uri->userinfo = g_strdup (userinfo);
1883   uri->host = g_strdup (host);
1884   uri->port = port;
1885   uri->path = g_strdup (path);
1886   uri->query = g_strdup (query);
1887   uri->fragment = g_strdup (fragment);
1888 
1889   return g_steal_pointer (&uri);
1890 }
1891 
1892 /**
1893  * g_uri_build_with_user:
1894  * @flags: flags describing how to build the #GUri
1895  * @scheme: (not nullable): the URI scheme
1896  * @user: (nullable): the user component of the userinfo, or %NULL
1897  * @password: (nullable): the password component of the userinfo, or %NULL
1898  * @auth_params: (nullable): the auth params of the userinfo, or %NULL
1899  * @host: (nullable): the host component, or %NULL
1900  * @port: the port, or `-1`
1901  * @path: (not nullable): the path component
1902  * @query: (nullable): the query component, or %NULL
1903  * @fragment: (nullable): the fragment, or %NULL
1904  *
1905  * Creates a new #GUri from the given components according to @flags
1906  * (%G_URI_FLAGS_HAS_PASSWORD is added unconditionally). The @flags must be
1907  * coherent with the passed values, in particular use `%`-encoded values with
1908  * %G_URI_FLAGS_ENCODED.
1909  *
1910  * In contrast to g_uri_build(), this allows specifying the components
1911  * of the ‘userinfo’ field separately. Note that @user must be non-%NULL
1912  * if either @password or @auth_params is non-%NULL.
1913  *
1914  * Return value: (not nullable) (transfer full): a new #GUri
1915  *
1916  * Since: 2.66
1917  */
1918 GUri *
g_uri_build_with_user(GUriFlags flags,const gchar * scheme,const gchar * user,const gchar * password,const gchar * auth_params,const gchar * host,gint port,const gchar * path,const gchar * query,const gchar * fragment)1919 g_uri_build_with_user (GUriFlags    flags,
1920                        const gchar *scheme,
1921                        const gchar *user,
1922                        const gchar *password,
1923                        const gchar *auth_params,
1924                        const gchar *host,
1925                        gint         port,
1926                        const gchar *path,
1927                        const gchar *query,
1928                        const gchar *fragment)
1929 {
1930   GUri *uri;
1931   GString *userinfo;
1932 
1933   g_return_val_if_fail (scheme != NULL, NULL);
1934   g_return_val_if_fail (password == NULL || user != NULL, NULL);
1935   g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
1936   g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
1937   g_return_val_if_fail (path != NULL, NULL);
1938 
1939   uri = g_atomic_rc_box_new0 (GUri);
1940   uri->flags = flags | G_URI_FLAGS_HAS_PASSWORD;
1941   uri->scheme = g_ascii_strdown (scheme, -1);
1942   uri->user = g_strdup (user);
1943   uri->password = g_strdup (password);
1944   uri->auth_params = g_strdup (auth_params);
1945   uri->host = g_strdup (host);
1946   uri->port = port;
1947   uri->path = g_strdup (path);
1948   uri->query = g_strdup (query);
1949   uri->fragment = g_strdup (fragment);
1950 
1951   if (user)
1952     {
1953       userinfo = g_string_new (user);
1954       if (password)
1955         {
1956           g_string_append_c (userinfo, ':');
1957           g_string_append (userinfo, uri->password);
1958         }
1959       if (auth_params)
1960         {
1961           g_string_append_c (userinfo, ';');
1962           g_string_append (userinfo, uri->auth_params);
1963         }
1964       uri->userinfo = g_string_free (userinfo, FALSE);
1965     }
1966 
1967   return g_steal_pointer (&uri);
1968 }
1969 
1970 /**
1971  * g_uri_to_string:
1972  * @uri: a #GUri
1973  *
1974  * Returns a string representing @uri.
1975  *
1976  * This is not guaranteed to return a string which is identical to the
1977  * string that @uri was parsed from. However, if the source URI was
1978  * syntactically correct (according to RFC 3986), and it was parsed
1979  * with %G_URI_FLAGS_ENCODED, then g_uri_to_string() is guaranteed to return
1980  * a string which is at least semantically equivalent to the source
1981  * URI (according to RFC 3986).
1982  *
1983  * If @uri might contain sensitive details, such as authentication parameters,
1984  * or private data in its query string, and the returned string is going to be
1985  * logged, then consider using g_uri_to_string_partial() to redact parts.
1986  *
1987  * Return value: (not nullable) (transfer full): a string representing @uri,
1988  *     which the caller must free.
1989  *
1990  * Since: 2.66
1991  */
1992 gchar *
g_uri_to_string(GUri * uri)1993 g_uri_to_string (GUri *uri)
1994 {
1995   g_return_val_if_fail (uri != NULL, NULL);
1996 
1997   return g_uri_to_string_partial (uri, G_URI_HIDE_NONE);
1998 }
1999 
2000 /**
2001  * g_uri_to_string_partial:
2002  * @uri: a #GUri
2003  * @flags: flags describing what parts of @uri to hide
2004  *
2005  * Returns a string representing @uri, subject to the options in
2006  * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
2007  *
2008  * Return value: (not nullable) (transfer full): a string representing
2009  *     @uri, which the caller must free.
2010  *
2011  * Since: 2.66
2012  */
2013 gchar *
g_uri_to_string_partial(GUri * uri,GUriHideFlags flags)2014 g_uri_to_string_partial (GUri          *uri,
2015                          GUriHideFlags  flags)
2016 {
2017   gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
2018   gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
2019   gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
2020   gboolean hide_query = (flags & G_URI_HIDE_QUERY);
2021   gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
2022 
2023   g_return_val_if_fail (uri != NULL, NULL);
2024 
2025   if (uri->flags & (G_URI_FLAGS_HAS_PASSWORD | G_URI_FLAGS_HAS_AUTH_PARAMS))
2026     {
2027       return g_uri_join_with_user (uri->flags,
2028                                    uri->scheme,
2029                                    hide_user ? NULL : uri->user,
2030                                    hide_password ? NULL : uri->password,
2031                                    hide_auth_params ? NULL : uri->auth_params,
2032                                    uri->host,
2033                                    uri->port,
2034                                    uri->path,
2035                                    hide_query ? NULL : uri->query,
2036                                    hide_fragment ? NULL : uri->fragment);
2037     }
2038 
2039   return g_uri_join (uri->flags,
2040                      uri->scheme,
2041                      hide_user ? NULL : uri->userinfo,
2042                      uri->host,
2043                      uri->port,
2044                      uri->path,
2045                      hide_query ? NULL : uri->query,
2046                      hide_fragment ? NULL : uri->fragment);
2047 }
2048 
2049 /* This is just a copy of g_str_hash() with g_ascii_toupper() added */
2050 static guint
str_ascii_case_hash(gconstpointer v)2051 str_ascii_case_hash (gconstpointer v)
2052 {
2053   const signed char *p;
2054   guint32 h = 5381;
2055 
2056   for (p = v; *p != '\0'; p++)
2057     h = (h << 5) + h + g_ascii_toupper (*p);
2058 
2059   return h;
2060 }
2061 
2062 static gboolean
str_ascii_case_equal(gconstpointer v1,gconstpointer v2)2063 str_ascii_case_equal (gconstpointer v1,
2064                       gconstpointer v2)
2065 {
2066   const gchar *string1 = v1;
2067   const gchar *string2 = v2;
2068 
2069   return g_ascii_strcasecmp (string1, string2) == 0;
2070 }
2071 
2072 /**
2073  * GUriParamsIter:
2074  *
2075  * Many URI schemes include one or more attribute/value pairs as part of the URI
2076  * value. For example `scheme://server/path?query=string&is=there` has two
2077  * attributes – `query=string` and `is=there` – in its query part.
2078  *
2079  * A #GUriParamsIter structure represents an iterator that can be used to
2080  * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
2081  * structures are typically allocated on the stack and then initialized with
2082  * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
2083  * for a usage example.
2084  *
2085  * Since: 2.66
2086  */
2087 typedef struct
2088 {
2089   GUriParamsFlags flags;
2090   const gchar    *attr;
2091   const gchar    *end;
2092   guint8          sep_table[256]; /* 1 = index is a separator; 0 otherwise */
2093 } RealIter;
2094 
2095 G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
2096 G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
2097 
2098 /**
2099  * g_uri_params_iter_init:
2100  * @iter: an uninitialized #GUriParamsIter
2101  * @params: a `%`-encoded string containing `attribute=value`
2102  *   parameters
2103  * @length: the length of @params, or `-1` if it is nul-terminated
2104  * @separators: the separator byte character set between parameters. (usually
2105  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2106  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2107  *   anything but ASCII characters. You may pass an empty set, in which case
2108  *   no splitting will occur.
2109  * @flags: flags to modify the way the parameters are handled.
2110  *
2111  * Initializes an attribute/value pair iterator.
2112  *
2113  * The iterator keeps pointers to the @params and @separators arguments, those
2114  * variables must thus outlive the iterator and not be modified during the
2115  * iteration.
2116  *
2117  * If %G_URI_PARAMS_WWW_FORM is passed in @flags, `+` characters in the param
2118  * string will be replaced with spaces in the output. For example, `foo=bar+baz`
2119  * will give attribute `foo` with value `bar baz`. This is commonly used on the
2120  * web (the `https` and `http` schemes only), but is deprecated in favour of
2121  * the equivalent of encoding spaces as `%20`.
2122  *
2123  * Unlike with g_uri_parse_params(), %G_URI_PARAMS_CASE_INSENSITIVE has no
2124  * effect if passed to @flags for g_uri_params_iter_init(). The caller is
2125  * responsible for doing their own case-insensitive comparisons.
2126  *
2127  * |[<!-- language="C" -->
2128  * GUriParamsIter iter;
2129  * GError *error = NULL;
2130  * gchar *unowned_attr, *unowned_value;
2131  *
2132  * g_uri_params_iter_init (&iter, "foo=bar&baz=bar&Foo=frob&baz=bar2", -1, "&", G_URI_PARAMS_NONE);
2133  * while (g_uri_params_iter_next (&iter, &unowned_attr, &unowned_value, &error))
2134  *   {
2135  *     g_autofree gchar *attr = g_steal_pointer (&unowned_attr);
2136  *     g_autofree gchar *value = g_steal_pointer (&unowned_value);
2137  *     // do something with attr and value; this code will be called 4 times
2138  *     // for the params string in this example: once with attr=foo and value=bar,
2139  *     // then with baz/bar, then Foo/frob, then baz/bar2.
2140  *   }
2141  * if (error)
2142  *   // handle parsing error
2143  * ]|
2144  *
2145  * Since: 2.66
2146  */
2147 void
g_uri_params_iter_init(GUriParamsIter * iter,const gchar * params,gssize length,const gchar * separators,GUriParamsFlags flags)2148 g_uri_params_iter_init (GUriParamsIter *iter,
2149                         const gchar    *params,
2150                         gssize          length,
2151                         const gchar    *separators,
2152                         GUriParamsFlags flags)
2153 {
2154   RealIter *ri = (RealIter *)iter;
2155   const gchar *s;
2156 
2157   g_return_if_fail (iter != NULL);
2158   g_return_if_fail (length == 0 || params != NULL);
2159   g_return_if_fail (length >= -1);
2160   g_return_if_fail (separators != NULL);
2161 
2162   ri->flags = flags;
2163 
2164   if (length == -1)
2165     ri->end = params + strlen (params);
2166   else
2167     ri->end = params + length;
2168 
2169   memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
2170   for (s = separators; *s != '\0'; ++s)
2171     ri->sep_table[*(guchar *)s] = TRUE;
2172 
2173   ri->attr = params;
2174 }
2175 
2176 /**
2177  * g_uri_params_iter_next:
2178  * @iter: an initialized #GUriParamsIter
2179  * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
2180  *     the attribute, or %NULL.
2181  * @value: (out) (nullable) (optional) (transfer full): on return, contains
2182  *     the value, or %NULL.
2183  * @error: #GError for error reporting, or %NULL to ignore.
2184  *
2185  * Advances @iter and retrieves the next attribute/value. %FALSE is returned if
2186  * an error has occurred (in which case @error is set), or if the end of the
2187  * iteration is reached (in which case @attribute and @value are set to %NULL
2188  * and the iterator becomes invalid). If %TRUE is returned,
2189  * g_uri_params_iter_next() may be called again to receive another
2190  * attribute/value pair.
2191  *
2192  * Note that the same @attribute may be returned multiple times, since URIs
2193  * allow repeated attributes.
2194  *
2195  * Returns: %FALSE if the end of the parameters has been reached or an error was
2196  *     encountered. %TRUE otherwise.
2197  *
2198  * Since: 2.66
2199  */
2200 gboolean
g_uri_params_iter_next(GUriParamsIter * iter,gchar ** attribute,gchar ** value,GError ** error)2201 g_uri_params_iter_next (GUriParamsIter *iter,
2202                         gchar         **attribute,
2203                         gchar         **value,
2204                         GError        **error)
2205 {
2206   RealIter *ri = (RealIter *)iter;
2207   const gchar *attr_end, *val, *val_end;
2208   gchar *decoded_attr, *decoded_value;
2209   gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
2210   GUriFlags decode_flags = G_URI_FLAGS_NONE;
2211 
2212   g_return_val_if_fail (iter != NULL, FALSE);
2213   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2214 
2215   /* Pre-clear these in case of failure or finishing. */
2216   if (attribute)
2217     *attribute = NULL;
2218   if (value)
2219     *value = NULL;
2220 
2221   if (ri->attr >= ri->end)
2222     return FALSE;
2223 
2224   if (ri->flags & G_URI_PARAMS_PARSE_RELAXED)
2225     decode_flags |= G_URI_FLAGS_PARSE_RELAXED;
2226 
2227   /* Check if each character in @attr is a separator, by indexing by the
2228    * character value into the @sep_table, which has value 1 stored at an
2229    * index if that index is a separator. */
2230   for (val_end = ri->attr; val_end < ri->end; val_end++)
2231     if (ri->sep_table[*(guchar *)val_end])
2232       break;
2233 
2234   attr_end = memchr (ri->attr, '=', val_end - ri->attr);
2235   if (!attr_end)
2236     {
2237       g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_FAILED,
2238                            _("Missing ‘=’ and parameter value"));
2239       return FALSE;
2240     }
2241   if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
2242                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2243     {
2244       return FALSE;
2245     }
2246 
2247   val = attr_end + 1;
2248   if (!uri_decode (&decoded_value, NULL, val, val_end - val,
2249                    www_form, decode_flags, G_URI_ERROR_FAILED, error))
2250     {
2251       g_free (decoded_attr);
2252       return FALSE;
2253     }
2254 
2255   if (attribute)
2256     *attribute = g_steal_pointer (&decoded_attr);
2257   if (value)
2258     *value = g_steal_pointer (&decoded_value);
2259 
2260   g_free (decoded_attr);
2261   g_free (decoded_value);
2262 
2263   ri->attr = val_end + 1;
2264   return TRUE;
2265 }
2266 
2267 /**
2268  * g_uri_parse_params:
2269  * @params: a `%`-encoded string containing `attribute=value`
2270  *   parameters
2271  * @length: the length of @params, or `-1` if it is nul-terminated
2272  * @separators: the separator byte character set between parameters. (usually
2273  *   `&`, but sometimes `;` or both `&;`). Note that this function works on
2274  *   bytes not characters, so it can't be used to delimit UTF-8 strings for
2275  *   anything but ASCII characters. You may pass an empty set, in which case
2276  *   no splitting will occur.
2277  * @flags: flags to modify the way the parameters are handled.
2278  * @error: #GError for error reporting, or %NULL to ignore.
2279  *
2280  * Many URI schemes include one or more attribute/value pairs as part of the URI
2281  * value. This method can be used to parse them into a hash table. When an
2282  * attribute has multiple occurrences, the last value is the final returned
2283  * value. If you need to handle repeated attributes differently, use
2284  * #GUriParamsIter.
2285  *
2286  * The @params string is assumed to still be `%`-encoded, but the returned
2287  * values will be fully decoded. (Thus it is possible that the returned values
2288  * may contain `=` or @separators, if the value was encoded in the input.)
2289  * Invalid `%`-encoding is treated as with the %G_URI_FLAGS_PARSE_RELAXED
2290  * rules for g_uri_parse(). (However, if @params is the path or query string
2291  * from a #GUri that was parsed without %G_URI_FLAGS_PARSE_RELAXED and
2292  * %G_URI_FLAGS_ENCODED, then you already know that it does not contain any
2293  * invalid encoding.)
2294  *
2295  * %G_URI_PARAMS_WWW_FORM is handled as documented for g_uri_params_iter_init().
2296  *
2297  * If %G_URI_PARAMS_CASE_INSENSITIVE is passed to @flags, attributes will be
2298  * compared case-insensitively, so a params string `attr=123&Attr=456` will only
2299  * return a single attribute–value pair, `Attr=456`. Case will be preserved in
2300  * the returned attributes.
2301  *
2302  * If @params cannot be parsed (for example, it contains two @separators
2303  * characters in a row), then @error is set and %NULL is returned.
2304  *
2305  * Return value: (transfer full) (element-type utf8 utf8):
2306  *     A hash table of attribute/value pairs, with both names and values
2307  *     fully-decoded; or %NULL on error.
2308  *
2309  * Since: 2.66
2310  */
2311 GHashTable *
g_uri_parse_params(const gchar * params,gssize length,const gchar * separators,GUriParamsFlags flags,GError ** error)2312 g_uri_parse_params (const gchar     *params,
2313                     gssize           length,
2314                     const gchar     *separators,
2315                     GUriParamsFlags  flags,
2316                     GError         **error)
2317 {
2318   GHashTable *hash;
2319   GUriParamsIter iter;
2320   gchar *attribute, *value;
2321   GError *err = NULL;
2322 
2323   g_return_val_if_fail (length == 0 || params != NULL, NULL);
2324   g_return_val_if_fail (length >= -1, NULL);
2325   g_return_val_if_fail (separators != NULL, NULL);
2326   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
2327 
2328   if (flags & G_URI_PARAMS_CASE_INSENSITIVE)
2329     {
2330       hash = g_hash_table_new_full (str_ascii_case_hash,
2331                                     str_ascii_case_equal,
2332                                     g_free, g_free);
2333     }
2334   else
2335     {
2336       hash = g_hash_table_new_full (g_str_hash, g_str_equal,
2337                                     g_free, g_free);
2338     }
2339 
2340   g_uri_params_iter_init (&iter, params, length, separators, flags);
2341 
2342   while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
2343     g_hash_table_insert (hash, attribute, value);
2344 
2345   if (err)
2346     {
2347       g_propagate_error (error, g_steal_pointer (&err));
2348       g_hash_table_destroy (hash);
2349       return NULL;
2350     }
2351 
2352   return g_steal_pointer (&hash);
2353 }
2354 
2355 /**
2356  * g_uri_get_scheme:
2357  * @uri: a #GUri
2358  *
2359  * Gets @uri's scheme. Note that this will always be all-lowercase,
2360  * regardless of the string or strings that @uri was created from.
2361  *
2362  * Return value: (not nullable): @uri's scheme.
2363  *
2364  * Since: 2.66
2365  */
2366 const gchar *
g_uri_get_scheme(GUri * uri)2367 g_uri_get_scheme (GUri *uri)
2368 {
2369   g_return_val_if_fail (uri != NULL, NULL);
2370 
2371   return uri->scheme;
2372 }
2373 
2374 /**
2375  * g_uri_get_userinfo:
2376  * @uri: a #GUri
2377  *
2378  * Gets @uri's userinfo, which may contain `%`-encoding, depending on
2379  * the flags with which @uri was created.
2380  *
2381  * Return value: (nullable): @uri's userinfo.
2382  *
2383  * Since: 2.66
2384  */
2385 const gchar *
g_uri_get_userinfo(GUri * uri)2386 g_uri_get_userinfo (GUri *uri)
2387 {
2388   g_return_val_if_fail (uri != NULL, NULL);
2389 
2390   return uri->userinfo;
2391 }
2392 
2393 /**
2394  * g_uri_get_user:
2395  * @uri: a #GUri
2396  *
2397  * Gets the ‘username’ component of @uri's userinfo, which may contain
2398  * `%`-encoding, depending on the flags with which @uri was created.
2399  * If @uri was not created with %G_URI_FLAGS_HAS_PASSWORD or
2400  * %G_URI_FLAGS_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
2401  *
2402  * Return value: (nullable): @uri's user.
2403  *
2404  * Since: 2.66
2405  */
2406 const gchar *
g_uri_get_user(GUri * uri)2407 g_uri_get_user (GUri *uri)
2408 {
2409   g_return_val_if_fail (uri != NULL, NULL);
2410 
2411   return uri->user;
2412 }
2413 
2414 /**
2415  * g_uri_get_password:
2416  * @uri: a #GUri
2417  *
2418  * Gets @uri's password, which may contain `%`-encoding, depending on
2419  * the flags with which @uri was created. (If @uri was not created
2420  * with %G_URI_FLAGS_HAS_PASSWORD then this will be %NULL.)
2421  *
2422  * Return value: (nullable): @uri's password.
2423  *
2424  * Since: 2.66
2425  */
2426 const gchar *
g_uri_get_password(GUri * uri)2427 g_uri_get_password (GUri *uri)
2428 {
2429   g_return_val_if_fail (uri != NULL, NULL);
2430 
2431   return uri->password;
2432 }
2433 
2434 /**
2435  * g_uri_get_auth_params:
2436  * @uri: a #GUri
2437  *
2438  * Gets @uri's authentication parameters, which may contain
2439  * `%`-encoding, depending on the flags with which @uri was created.
2440  * (If @uri was not created with %G_URI_FLAGS_HAS_AUTH_PARAMS then this will
2441  * be %NULL.)
2442  *
2443  * Depending on the URI scheme, g_uri_parse_params() may be useful for
2444  * further parsing this information.
2445  *
2446  * Return value: (nullable): @uri's authentication parameters.
2447  *
2448  * Since: 2.66
2449  */
2450 const gchar *
g_uri_get_auth_params(GUri * uri)2451 g_uri_get_auth_params (GUri *uri)
2452 {
2453   g_return_val_if_fail (uri != NULL, NULL);
2454 
2455   return uri->auth_params;
2456 }
2457 
2458 /**
2459  * g_uri_get_host:
2460  * @uri: a #GUri
2461  *
2462  * Gets @uri's host. This will never have `%`-encoded characters,
2463  * unless it is non-UTF-8 (which can only be the case if @uri was
2464  * created with %G_URI_FLAGS_NON_DNS).
2465  *
2466  * If @uri contained an IPv6 address literal, this value will be just
2467  * that address, without the brackets around it that are necessary in
2468  * the string form of the URI. Note that in this case there may also
2469  * be a scope ID attached to the address. Eg, `fe80::1234%``em1` (or
2470  * `fe80::1234%``25em1` if the string is still encoded).
2471  *
2472  * Return value: (nullable): @uri's host.
2473  *
2474  * Since: 2.66
2475  */
2476 const gchar *
g_uri_get_host(GUri * uri)2477 g_uri_get_host (GUri *uri)
2478 {
2479   g_return_val_if_fail (uri != NULL, NULL);
2480 
2481   return uri->host;
2482 }
2483 
2484 /**
2485  * g_uri_get_port:
2486  * @uri: a #GUri
2487  *
2488  * Gets @uri's port.
2489  *
2490  * Return value: @uri's port, or `-1` if no port was specified.
2491  *
2492  * Since: 2.66
2493  */
2494 gint
g_uri_get_port(GUri * uri)2495 g_uri_get_port (GUri *uri)
2496 {
2497   g_return_val_if_fail (uri != NULL, -1);
2498 
2499   if (uri->port == -1 && uri->flags & G_URI_FLAGS_SCHEME_NORMALIZE)
2500     return default_scheme_port (uri->scheme);
2501 
2502   return uri->port;
2503 }
2504 
2505 /**
2506  * g_uri_get_path:
2507  * @uri: a #GUri
2508  *
2509  * Gets @uri's path, which may contain `%`-encoding, depending on the
2510  * flags with which @uri was created.
2511  *
2512  * Return value: (not nullable): @uri's path.
2513  *
2514  * Since: 2.66
2515  */
2516 const gchar *
g_uri_get_path(GUri * uri)2517 g_uri_get_path (GUri *uri)
2518 {
2519   g_return_val_if_fail (uri != NULL, NULL);
2520 
2521   return uri->path;
2522 }
2523 
2524 /**
2525  * g_uri_get_query:
2526  * @uri: a #GUri
2527  *
2528  * Gets @uri's query, which may contain `%`-encoding, depending on the
2529  * flags with which @uri was created.
2530  *
2531  * For queries consisting of a series of `name=value` parameters,
2532  * #GUriParamsIter or g_uri_parse_params() may be useful.
2533  *
2534  * Return value: (nullable): @uri's query.
2535  *
2536  * Since: 2.66
2537  */
2538 const gchar *
g_uri_get_query(GUri * uri)2539 g_uri_get_query (GUri *uri)
2540 {
2541   g_return_val_if_fail (uri != NULL, NULL);
2542 
2543   return uri->query;
2544 }
2545 
2546 /**
2547  * g_uri_get_fragment:
2548  * @uri: a #GUri
2549  *
2550  * Gets @uri's fragment, which may contain `%`-encoding, depending on
2551  * the flags with which @uri was created.
2552  *
2553  * Return value: (nullable): @uri's fragment.
2554  *
2555  * Since: 2.66
2556  */
2557 const gchar *
g_uri_get_fragment(GUri * uri)2558 g_uri_get_fragment (GUri *uri)
2559 {
2560   g_return_val_if_fail (uri != NULL, NULL);
2561 
2562   return uri->fragment;
2563 }
2564 
2565 
2566 /**
2567  * g_uri_get_flags:
2568  * @uri: a #GUri
2569  *
2570  * Gets @uri's flags set upon construction.
2571  *
2572  * Return value: @uri's flags.
2573  *
2574  * Since: 2.66
2575  **/
2576 GUriFlags
g_uri_get_flags(GUri * uri)2577 g_uri_get_flags (GUri *uri)
2578 {
2579   g_return_val_if_fail (uri != NULL, G_URI_FLAGS_NONE);
2580 
2581   return uri->flags;
2582 }
2583 
2584 /**
2585  * g_uri_unescape_segment:
2586  * @escaped_string: (nullable): A string, may be %NULL
2587  * @escaped_string_end: (nullable): Pointer to end of @escaped_string,
2588  *   may be %NULL
2589  * @illegal_characters: (nullable): An optional string of illegal
2590  *   characters not to be allowed, may be %NULL
2591  *
2592  * Unescapes a segment of an escaped string.
2593  *
2594  * If any of the characters in @illegal_characters or the NUL
2595  * character appears as an escaped character in @escaped_string, then
2596  * that is an error and %NULL will be returned. This is useful if you
2597  * want to avoid for instance having a slash being expanded in an
2598  * escaped path element, which might confuse pathname handling.
2599  *
2600  * Note: `NUL` byte is not accepted in the output, in contrast to
2601  * g_uri_unescape_bytes().
2602  *
2603  * Returns: (nullable): an unescaped version of @escaped_string,
2604  * or %NULL on error. The returned string should be freed when no longer
2605  * needed.  As a special case if %NULL is given for @escaped_string, this
2606  * function will return %NULL.
2607  *
2608  * Since: 2.16
2609  **/
2610 gchar *
g_uri_unescape_segment(const gchar * escaped_string,const gchar * escaped_string_end,const gchar * illegal_characters)2611 g_uri_unescape_segment (const gchar *escaped_string,
2612                         const gchar *escaped_string_end,
2613                         const gchar *illegal_characters)
2614 {
2615   gchar *unescaped;
2616   gsize length;
2617   gssize decoded_len;
2618 
2619   if (!escaped_string)
2620     return NULL;
2621 
2622   if (escaped_string_end)
2623     length = escaped_string_end - escaped_string;
2624   else
2625     length = strlen (escaped_string);
2626 
2627   decoded_len = uri_decoder (&unescaped,
2628                              illegal_characters,
2629                              escaped_string, length,
2630                              FALSE, FALSE,
2631                              G_URI_FLAGS_ENCODED,
2632                              0, NULL);
2633   if (decoded_len < 0)
2634     return NULL;
2635 
2636   if (memchr (unescaped, '\0', decoded_len))
2637     {
2638       g_free (unescaped);
2639       return NULL;
2640     }
2641 
2642   return unescaped;
2643 }
2644 
2645 /**
2646  * g_uri_unescape_string:
2647  * @escaped_string: an escaped string to be unescaped.
2648  * @illegal_characters: (nullable): a string of illegal characters
2649  *   not to be allowed, or %NULL.
2650  *
2651  * Unescapes a whole escaped string.
2652  *
2653  * If any of the characters in @illegal_characters or the NUL
2654  * character appears as an escaped character in @escaped_string, then
2655  * that is an error and %NULL will be returned. This is useful if you
2656  * want to avoid for instance having a slash being expanded in an
2657  * escaped path element, which might confuse pathname handling.
2658  *
2659  * Returns: (nullable): an unescaped version of @escaped_string.
2660  * The returned string should be freed when no longer needed.
2661  *
2662  * Since: 2.16
2663  **/
2664 gchar *
g_uri_unescape_string(const gchar * escaped_string,const gchar * illegal_characters)2665 g_uri_unescape_string (const gchar *escaped_string,
2666                        const gchar *illegal_characters)
2667 {
2668   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
2669 }
2670 
2671 /**
2672  * g_uri_escape_string:
2673  * @unescaped: the unescaped input string.
2674  * @reserved_chars_allowed: (nullable): a string of reserved
2675  *   characters that are allowed to be used, or %NULL.
2676  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
2677  *
2678  * Escapes a string for use in a URI.
2679  *
2680  * Normally all characters that are not "unreserved" (i.e. ASCII
2681  * alphanumerical characters plus dash, dot, underscore and tilde) are
2682  * escaped. But if you specify characters in @reserved_chars_allowed
2683  * they are not escaped. This is useful for the "reserved" characters
2684  * in the URI specification, since those are allowed unescaped in some
2685  * portions of a URI.
2686  *
2687  * Returns: (not nullable): an escaped version of @unescaped. The
2688  * returned string should be freed when no longer needed.
2689  *
2690  * Since: 2.16
2691  **/
2692 gchar *
g_uri_escape_string(const gchar * unescaped,const gchar * reserved_chars_allowed,gboolean allow_utf8)2693 g_uri_escape_string (const gchar *unescaped,
2694                      const gchar *reserved_chars_allowed,
2695                      gboolean     allow_utf8)
2696 {
2697   GString *s;
2698 
2699   g_return_val_if_fail (unescaped != NULL, NULL);
2700 
2701   s = g_string_sized_new (strlen (unescaped) * 1.25);
2702 
2703   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
2704 
2705   return g_string_free (s, FALSE);
2706 }
2707 
2708 /**
2709  * g_uri_unescape_bytes:
2710  * @escaped_string: A URI-escaped string
2711  * @length: the length (in bytes) of @escaped_string to escape, or `-1` if it
2712  *   is nul-terminated.
2713  * @illegal_characters: (nullable): a string of illegal characters
2714  *   not to be allowed, or %NULL.
2715  * @error: #GError for error reporting, or %NULL to ignore.
2716  *
2717  * Unescapes a segment of an escaped string as binary data.
2718  *
2719  * Note that in contrast to g_uri_unescape_string(), this does allow
2720  * nul bytes to appear in the output.
2721  *
2722  * If any of the characters in @illegal_characters appears as an escaped
2723  * character in @escaped_string, then that is an error and %NULL will be
2724  * returned. This is useful if you want to avoid for instance having a slash
2725  * being expanded in an escaped path element, which might confuse pathname
2726  * handling.
2727  *
2728  * Returns: (transfer full): an unescaped version of @escaped_string
2729  *     or %NULL on error (if decoding failed, using %G_URI_ERROR_FAILED error
2730  *     code). The returned #GBytes should be unreffed when no longer needed.
2731  *
2732  * Since: 2.66
2733  **/
2734 GBytes *
g_uri_unescape_bytes(const gchar * escaped_string,gssize length,const char * illegal_characters,GError ** error)2735 g_uri_unescape_bytes (const gchar *escaped_string,
2736                       gssize       length,
2737                       const char *illegal_characters,
2738                       GError     **error)
2739 {
2740   gchar *buf;
2741   gssize unescaped_length;
2742 
2743   g_return_val_if_fail (escaped_string != NULL, NULL);
2744   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
2745 
2746   if (length == -1)
2747     length = strlen (escaped_string);
2748 
2749   unescaped_length = uri_decoder (&buf,
2750                                   illegal_characters,
2751                                   escaped_string, length,
2752                                   FALSE,
2753                                   FALSE,
2754                                   G_URI_FLAGS_ENCODED,
2755                                   G_URI_ERROR_FAILED, error);
2756   if (unescaped_length == -1)
2757     return NULL;
2758 
2759   return g_bytes_new_take (buf, unescaped_length);
2760 }
2761 
2762 /**
2763  * g_uri_escape_bytes:
2764  * @unescaped: (array length=length): the unescaped input data.
2765  * @length: the length of @unescaped
2766  * @reserved_chars_allowed: (nullable): a string of reserved
2767  *   characters that are allowed to be used, or %NULL.
2768  *
2769  * Escapes arbitrary data for use in a URI.
2770  *
2771  * Normally all characters that are not ‘unreserved’ (i.e. ASCII
2772  * alphanumerical characters plus dash, dot, underscore and tilde) are
2773  * escaped. But if you specify characters in @reserved_chars_allowed
2774  * they are not escaped. This is useful for the ‘reserved’ characters
2775  * in the URI specification, since those are allowed unescaped in some
2776  * portions of a URI.
2777  *
2778  * Though technically incorrect, this will also allow escaping nul
2779  * bytes as `%``00`.
2780  *
2781  * Returns: (not nullable) (transfer full): an escaped version of @unescaped.
2782  *     The returned string should be freed when no longer needed.
2783  *
2784  * Since: 2.66
2785  */
2786 gchar *
g_uri_escape_bytes(const guint8 * unescaped,gsize length,const gchar * reserved_chars_allowed)2787 g_uri_escape_bytes (const guint8 *unescaped,
2788                     gsize         length,
2789                     const gchar  *reserved_chars_allowed)
2790 {
2791   GString *string;
2792 
2793   g_return_val_if_fail (unescaped != NULL, NULL);
2794 
2795   string = g_string_sized_new (length * 1.25);
2796 
2797   _uri_encoder (string, unescaped, length,
2798                reserved_chars_allowed, FALSE);
2799 
2800   return g_string_free (string, FALSE);
2801 }
2802 
2803 static gssize
g_uri_scheme_length(const gchar * uri)2804 g_uri_scheme_length (const gchar *uri)
2805 {
2806   const gchar *p;
2807 
2808   p = uri;
2809   if (!g_ascii_isalpha (*p))
2810     return -1;
2811   p++;
2812   while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
2813     p++;
2814 
2815   if (p > uri && *p == ':')
2816     return p - uri;
2817 
2818   return -1;
2819 }
2820 
2821 /**
2822  * g_uri_parse_scheme:
2823  * @uri: a valid URI.
2824  *
2825  * Gets the scheme portion of a URI string.
2826  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2827  * as:
2828  * |[
2829  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2830  * ]|
2831  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2832  *
2833  * Returns: (transfer full) (nullable): The ‘scheme’ component of the URI, or
2834  *     %NULL on error. The returned string should be freed when no longer needed.
2835  *
2836  * Since: 2.16
2837  **/
2838 gchar *
g_uri_parse_scheme(const gchar * uri)2839 g_uri_parse_scheme (const gchar *uri)
2840 {
2841   gssize len;
2842 
2843   g_return_val_if_fail (uri != NULL, NULL);
2844 
2845   len = g_uri_scheme_length (uri);
2846   return len == -1 ? NULL : g_strndup (uri, len);
2847 }
2848 
2849 /**
2850  * g_uri_peek_scheme:
2851  * @uri: a valid URI.
2852  *
2853  * Gets the scheme portion of a URI string.
2854  * [RFC 3986](https://tools.ietf.org/html/rfc3986#section-3) decodes the scheme
2855  * as:
2856  * |[
2857  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
2858  * ]|
2859  * Common schemes include `file`, `https`, `svn+ssh`, etc.
2860  *
2861  * Unlike g_uri_parse_scheme(), the returned scheme is normalized to
2862  * all-lowercase and does not need to be freed.
2863  *
2864  * Returns: (transfer none) (nullable): The ‘scheme’ component of the URI, or
2865  *     %NULL on error. The returned string is normalized to all-lowercase, and
2866  *     interned via g_intern_string(), so it does not need to be freed.
2867  *
2868  * Since: 2.66
2869  **/
2870 const gchar *
g_uri_peek_scheme(const gchar * uri)2871 g_uri_peek_scheme (const gchar *uri)
2872 {
2873   gssize len;
2874   gchar *lower_scheme;
2875   const gchar *scheme;
2876 
2877   g_return_val_if_fail (uri != NULL, NULL);
2878 
2879   len = g_uri_scheme_length (uri);
2880   if (len == -1)
2881     return NULL;
2882 
2883   lower_scheme = g_ascii_strdown (uri, len);
2884   scheme = g_intern_string (lower_scheme);
2885   g_free (lower_scheme);
2886 
2887   return scheme;
2888 }
2889 
2890 G_DEFINE_QUARK (g-uri-quark, g_uri_error)
2891