1 /* soup-uri-utils.c
2  *
3  * Copyright 2020 Igalia S.L.
4  * Copyright 1999-2003 Ximian, Inc.
5  *
6  * This file is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU Lesser General Public License as
8  * published by the Free Software Foundation; either version 2 of the
9  * License, or (at your option) any later version.
10  *
11  * This file is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * SPDX-License-Identifier: LGPL-2.0-or-later
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 
26 #include <string.h>
27 #include <stdlib.h>
28 
29 #include <glib/gi18n-lib.h>
30 
31 #include "soup-uri-utils-private.h"
32 #include "soup.h"
33 #include "soup-misc.h"
34 
35 /**
36  * SECTION:soup-uri-utils
37  * @section_id: SoupURIUtils
38  * @title: URI Utilities
39  * @short_description: Functions to help working with #GUri and HTTP
40  *
41  * Utility functions and defines to help working with URIs.
42  */
43 
44 /**
45  * SOUP_HTTP_URI_FLAGS:
46  *
47  * The set of #GUriFlags libsoup expects all #GUri to use.
48  */
49 
50 static inline int
soup_scheme_default_port(const char * scheme)51 soup_scheme_default_port (const char *scheme)
52 {
53         if (!g_strcmp0 (scheme, "http") ||
54             !g_strcmp0 (scheme, "ws"))
55 		return 80;
56 	else if (!g_strcmp0 (scheme, "https") ||
57                  !g_strcmp0 (scheme, "wss"))
58 		return 443;
59 	else if (!g_strcmp0 (scheme, "ftp"))
60 		return 21;
61 	else
62 		return -1;
63 }
64 
65 static inline gboolean
parts_equal(const char * one,const char * two,gboolean insensitive)66 parts_equal (const char *one, const char *two, gboolean insensitive)
67 {
68 	if (!one && !two)
69 		return TRUE;
70 	if (!one || !two)
71 		return FALSE;
72 	return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
73 }
74 
75 static inline gboolean
path_equal(const char * one,const char * two)76 path_equal (const char *one, const char *two)
77 {
78         if (one[0] == '\0')
79                 one = "/";
80         if (two[0] == '\0')
81                 two = "/";
82 
83 	return !strcmp (one, two);
84 }
85 
86 static gboolean
flags_equal(GUriFlags flags1,GUriFlags flags2)87 flags_equal (GUriFlags flags1, GUriFlags flags2)
88 {
89         /* We only care about flags that affect the contents which these do */
90         static const GUriFlags normalization_flags = (G_URI_FLAGS_ENCODED | G_URI_FLAGS_ENCODED_FRAGMENT |
91                                                       G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY |
92                                                       G_URI_FLAGS_SCHEME_NORMALIZE);
93 
94         return (flags1 & normalization_flags) == (flags2 & normalization_flags);
95 }
96 
97 /**
98  * soup_uri_equal:
99  * @uri1: a #GUri
100  * @uri2: another #GUri
101  *
102  * Tests whether or not @uri1 and @uri2 are equal in all parts
103  *
104  * Returns: %TRUE if equal otherwise %FALSE
105  **/
106 gboolean
soup_uri_equal(GUri * uri1,GUri * uri2)107 soup_uri_equal (GUri *uri1, GUri *uri2)
108 {
109      	g_return_val_if_fail (uri1 != NULL, FALSE);
110 	g_return_val_if_fail (uri2 != NULL, FALSE);
111 
112        	if (!flags_equal (g_uri_get_flags (uri1), g_uri_get_flags (uri2))                  ||
113             g_strcmp0 (g_uri_get_scheme (uri1), g_uri_get_scheme (uri2))                   ||
114 	    g_uri_get_port (uri1) != g_uri_get_port (uri2)                                 ||
115 	    !parts_equal (g_uri_get_user (uri1), g_uri_get_user (uri2), FALSE)             ||
116 	    !parts_equal (g_uri_get_password (uri1), g_uri_get_password (uri2), FALSE)     ||
117 	    !parts_equal (g_uri_get_host (uri1), g_uri_get_host (uri2), TRUE)              ||
118 	    !path_equal (g_uri_get_path (uri1), g_uri_get_path (uri2))                     ||
119 	    !parts_equal (g_uri_get_query (uri1), g_uri_get_query (uri2), FALSE)           ||
120 	    !parts_equal (g_uri_get_fragment (uri1), g_uri_get_fragment (uri2), FALSE)) {
121                 return FALSE;
122             }
123 
124         return TRUE;
125 }
126 
127 /**
128  * soup_uri_get_path_and_query:
129  * @uri: a #GUri
130  *
131  * Extracts the `path` and `query` parts from @uri.
132  *
133  * Returns: string of combined path and query
134  **/
135 char *
soup_uri_get_path_and_query(GUri * uri)136 soup_uri_get_path_and_query (GUri *uri)
137 {
138         const char *query;
139 
140 	g_return_val_if_fail (uri != NULL, NULL);
141 
142         query = g_uri_get_query (uri);
143 
144         return g_strdup_printf ("%s%c%s", g_uri_get_path (uri),
145                                 query ? '?' : '\0',
146                                 query ? query : "");
147 }
148 
149 /**
150  * soup_uri_uses_default_port:
151  * @uri: a #GUri
152  *
153  * Tests if @uri uses the default port for its scheme. (Eg, 80 for
154  * http.) (This only works for http, https and ftp; libsoup does not know
155  * the default ports of other protocols.)
156  *
157  * Returns: %TRUE or %FALSE
158  **/
159 gboolean
soup_uri_uses_default_port(GUri * uri)160 soup_uri_uses_default_port (GUri *uri)
161 {
162         g_return_val_if_fail (uri != NULL, FALSE);
163 
164         if (g_uri_get_port (uri) == -1)
165                 return TRUE;
166 
167         if (g_uri_get_scheme (uri))
168                 return g_uri_get_port (uri) == soup_scheme_default_port (g_uri_get_scheme (uri));
169 
170         return FALSE;
171 }
172 
173 GUri *
soup_uri_copy_host(GUri * uri)174 soup_uri_copy_host (GUri *uri)
175 {
176         g_return_val_if_fail (uri != NULL, NULL);
177 
178         return soup_uri_copy (uri,
179                               SOUP_URI_USER, NULL,
180                               SOUP_URI_PASSWORD, NULL,
181                               SOUP_URI_AUTH_PARAMS, NULL,
182                               SOUP_URI_PATH, "/",
183                               SOUP_URI_QUERY, NULL,
184                               SOUP_URI_FRAGMENT, NULL,
185                               SOUP_URI_NONE);
186 }
187 
188 /**
189  * soup_uri_host_hash:
190  * @key: (type GUri): a #GUri with a non-%NULL @host member
191  *
192  * Hashes @key, considering only the scheme, host, and port.
193  *
194  * Returns: A hash
195  */
196 guint
soup_uri_host_hash(gconstpointer key)197 soup_uri_host_hash (gconstpointer key)
198 {
199 	GUri *uri = (GUri*)key;
200         const char *host;
201 
202 	g_return_val_if_fail (uri != NULL, 0);
203 
204         host = g_uri_get_host (uri);
205 
206 	g_return_val_if_fail (host != NULL, 0);
207 
208 	return soup_str_case_hash (g_uri_get_scheme (uri)) +
209                g_uri_get_port (uri) +
210 	       soup_str_case_hash (host);
211 }
212 
213 /**
214  * soup_uri_host_equal:
215  * @v1: (type GUri): a #GUri with a non-%NULL @host member
216  * @v2: (type GUri): a #GUri with a non-%NULL @host member
217  *
218  * Compares @v1 and @v2, considering only the scheme, host, and port.
219  *
220  * Returns: %TRUE if the URIs are equal in scheme, host, and port.
221  */
222 gboolean
soup_uri_host_equal(gconstpointer v1,gconstpointer v2)223 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
224 {
225 	GUri *one = (GUri*)v1;
226 	GUri *two = (GUri*)v2;
227         const char *one_host, *two_host;
228 
229 	g_return_val_if_fail (one != NULL && two != NULL, one == two);
230 
231         one_host = g_uri_get_host (one);
232         two_host = g_uri_get_host (two);
233 
234 	g_return_val_if_fail (one_host != NULL && two_host != NULL, one_host == two_host);
235 
236         if (one == two)
237                 return TRUE;
238 	if (g_strcmp0 (g_uri_get_scheme (one), g_uri_get_scheme (two)) != 0)
239 		return FALSE;
240 
241 	if (g_uri_get_port (one) != g_uri_get_port (two))
242 		return FALSE;
243 
244 	return g_ascii_strcasecmp (one_host, two_host) == 0;
245 }
246 
247 gboolean
soup_uri_is_https(GUri * uri)248 soup_uri_is_https (GUri *uri)
249 {
250         const char *scheme;
251 
252         g_assert (uri != NULL);
253 
254         scheme = g_uri_get_scheme (uri);
255         if (G_UNLIKELY (scheme == NULL))
256                 return FALSE;
257 
258         return strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0;
259 }
260 
261 gboolean
soup_uri_is_http(GUri * uri)262 soup_uri_is_http (GUri *uri)
263 {
264         const char *scheme;
265 
266         g_assert (uri != NULL);
267 
268         scheme = g_uri_get_scheme (uri);
269         if (G_UNLIKELY (scheme == NULL))
270                 return FALSE;
271 
272         return strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0;
273 }
274 
275 #define BASE64_INDICATOR     ";base64"
276 #define BASE64_INDICATOR_LEN (sizeof (";base64") - 1)
277 
278 /**
279  * soup_uri_decode_data_uri:
280  * @uri: a data URI, in string form
281  * @content_type: (out) (nullable) (transfer full): location to store content type, or %NULL
282  *
283  * Decodes the given data URI and returns its contents and @content_type.
284  *
285  * Returns: (transfer full): a #GBytes with the contents of @uri,
286  *    or %NULL if @uri is not a valid data URI
287  */
288 GBytes *
soup_uri_decode_data_uri(const char * uri,char ** content_type)289 soup_uri_decode_data_uri (const char *uri,
290                           char      **content_type)
291 {
292         GUri *soup_uri;
293         const char *comma, *start, *end;
294         gboolean base64 = FALSE;
295         char *uri_string;
296         GBytes *bytes;
297 
298         g_return_val_if_fail (uri != NULL, NULL);
299 
300         soup_uri = g_uri_parse (uri, SOUP_HTTP_URI_FLAGS, NULL);
301         if (!soup_uri)
302                 return NULL;
303 
304         if (g_strcmp0 (g_uri_get_scheme (soup_uri), "data") || g_uri_get_host (soup_uri) != NULL) {
305                 g_uri_unref (soup_uri);
306                 return NULL;
307         }
308 
309         if (content_type)
310                 *content_type = NULL;
311 
312         uri_string = g_uri_to_string (soup_uri);
313         g_uri_unref (soup_uri);
314 
315         start = uri_string + 5;
316         comma = strchr (start, ',');
317         if (comma && comma != start) {
318                 /* Deal with MIME type / params */
319                 if (comma >= start + BASE64_INDICATOR_LEN && !g_ascii_strncasecmp (comma - BASE64_INDICATOR_LEN, BASE64_INDICATOR, BASE64_INDICATOR_LEN)) {
320                         end = comma - BASE64_INDICATOR_LEN;
321                         base64 = TRUE;
322                 } else
323                         end = comma;
324 
325                 if (end != start && content_type)
326                         *content_type = g_uri_unescape_segment (start, end, NULL);
327         }
328 
329         if (content_type && !*content_type)
330                 *content_type = g_strdup ("text/plain;charset=US-ASCII");
331 
332         if (comma)
333                 start = comma + 1;
334 
335         if (*start) {
336                 bytes = g_uri_unescape_bytes (start, -1, NULL, NULL);
337 
338                 if (base64 && bytes) {
339                         if (g_bytes_get_size (bytes) <= 1)
340                                 g_clear_pointer (&bytes, g_bytes_unref);
341                         else {
342                                 gsize content_length;
343                                 GByteArray *unescaped_array = g_bytes_unref_to_array (bytes);
344                                 g_base64_decode_inplace ((gchar*)unescaped_array->data, &content_length);
345                                 unescaped_array->len = content_length;
346                                 bytes = g_byte_array_free_to_bytes (unescaped_array);
347                         }
348                 }
349         } else {
350                 bytes = g_bytes_new_static (NULL, 0);
351         }
352         g_free (uri_string);
353 
354         return bytes;
355 }
356 
357 /**
358  * SoupURIComponent:
359  * @SOUP_URI_NONE: no component
360  * @SOUP_URI_SCHEME: the URI scheme component
361  * @SOUP_URI_USER: the URI user component
362  * @SOUP_URI_PASSWORD: the URI password component
363  * @SOUP_URI_AUTH_PARAMS: the URI authentication parameters component
364  * @SOUP_URI_HOST: the URI host component
365  * @SOUP_URI_PORT: the URI port component
366  * @SOUP_URI_PATH: the URI path component
367  * @SOUP_URI_QUERY: the URI query component
368  * @SOUP_URI_FRAGMENT: the URI fragment component
369  *
370  * Enum values passed to soup_uri_copy() to indicate the components of
371  * the URI that should be updated with the given values.
372  */
373 
374 /**
375  * soup_uri_copy: (skip)
376  * @uri: the #GUri to copy
377  * @first_component: first #SoupURIComponent to update
378  * @...: value of @first_component  followed by additional
379  *    components and values, terminated by %SOUP_URI_NONE
380  *
381  * Return a copy of @uri with the given components updated
382  *
383  * Returns: (transfer full): a new #GUri
384  */
385 GUri *
soup_uri_copy(GUri * uri,SoupURIComponent first_component,...)386 soup_uri_copy (GUri            *uri,
387                SoupURIComponent first_component,
388                ...)
389 {
390         va_list args;
391         SoupURIComponent component = first_component;
392         gpointer values[SOUP_URI_FRAGMENT + 1];
393         gboolean values_to_set[SOUP_URI_FRAGMENT + 1];
394         GUriFlags flags = g_uri_get_flags (uri);
395 
396         g_return_val_if_fail (uri != NULL, NULL);
397 
398         memset (&values_to_set, 0, sizeof (values_to_set));
399 
400         va_start (args, first_component);
401         while (component != SOUP_URI_NONE) {
402                 if (component == SOUP_URI_PORT)
403                         values[component] = GINT_TO_POINTER (va_arg (args, glong));
404                 else
405                         values[component] = va_arg (args, gpointer);
406                 values_to_set[component] = TRUE;
407                 component = va_arg (args, SoupURIComponent);
408         }
409         va_end (args);
410 
411         if (values_to_set[SOUP_URI_PASSWORD])
412                 flags |= G_URI_FLAGS_HAS_PASSWORD;
413         if (values_to_set[SOUP_URI_AUTH_PARAMS])
414                 flags |= G_URI_FLAGS_HAS_AUTH_PARAMS;
415         if (values_to_set[SOUP_URI_PATH])
416                 flags |= G_URI_FLAGS_ENCODED_PATH;
417         if (values_to_set[SOUP_URI_QUERY])
418                 flags |= G_URI_FLAGS_ENCODED_QUERY;
419         if (values_to_set[SOUP_URI_FRAGMENT])
420                 flags |= G_URI_FLAGS_ENCODED_FRAGMENT;
421         return g_uri_build_with_user (
422                 flags,
423                 values_to_set[SOUP_URI_SCHEME] ? values[SOUP_URI_SCHEME] : g_uri_get_scheme (uri),
424                 values_to_set[SOUP_URI_USER] ? values[SOUP_URI_USER] : g_uri_get_user (uri),
425                 values_to_set[SOUP_URI_PASSWORD] ? values[SOUP_URI_PASSWORD] : g_uri_get_password (uri),
426                 values_to_set[SOUP_URI_AUTH_PARAMS] ? values[SOUP_URI_AUTH_PARAMS] : g_uri_get_auth_params (uri),
427                 values_to_set[SOUP_URI_HOST] ? values[SOUP_URI_HOST] : g_uri_get_host (uri),
428                 values_to_set[SOUP_URI_PORT] ? GPOINTER_TO_INT (values[SOUP_URI_PORT]) : g_uri_get_port (uri),
429                 values_to_set[SOUP_URI_PATH] ? values[SOUP_URI_PATH] : g_uri_get_path (uri),
430                 values_to_set[SOUP_URI_QUERY] ? values[SOUP_URI_QUERY] : g_uri_get_query (uri),
431                 values_to_set[SOUP_URI_FRAGMENT] ? values[SOUP_URI_FRAGMENT] : g_uri_get_fragment (uri)
432         );
433 }
434 
435 GUri *
soup_uri_copy_with_normalized_flags(GUri * uri)436 soup_uri_copy_with_normalized_flags (GUri *uri)
437 {
438         GUriFlags flags = g_uri_get_flags (uri);
439 
440         /* We require its encoded (hostname encoding optional) */
441         if (((flags & (G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT)) ||
442              (flags & G_URI_FLAGS_ENCODED)) &&
443             /* And has scheme-based normalization */
444             (flags & G_URI_FLAGS_SCHEME_NORMALIZE))
445                 return g_uri_ref (uri);
446 
447         return g_uri_build_with_user (
448                 g_uri_get_flags (uri) | SOUP_HTTP_URI_FLAGS,
449                 g_uri_get_scheme (uri),
450                 g_uri_get_user (uri),
451                 g_uri_get_password (uri),
452                 g_uri_get_auth_params (uri),
453                 g_uri_get_host (uri),
454                 g_uri_get_port (uri),
455                 g_uri_get_path (uri),
456                 g_uri_get_query (uri),
457                 g_uri_get_fragment (uri)
458         );
459 }
460 
461 char *
soup_uri_get_host_for_headers(GUri * uri)462 soup_uri_get_host_for_headers (GUri *uri)
463 {
464         const char *host = g_uri_get_host (uri);
465 
466         if (strchr (host, ':'))
467                 return g_strdup_printf ("[%.*s]", (int)strcspn (host, "%"), host);
468         if (g_hostname_is_non_ascii (host))
469                 return g_hostname_to_ascii (host);
470 
471         return g_strdup (host);
472 }
473