1 /* soup-uri-utils.c
2 *
3 * Copyright 2020 Igalia S.L.
4 * Copyright 1999-2003 Ximian, Inc.
5 *
6 * This file is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU Lesser General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * This file is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * SPDX-License-Identifier: LGPL-2.0-or-later
20 */
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <string.h>
27 #include <stdlib.h>
28
29 #include <glib/gi18n-lib.h>
30
31 #include "soup-uri-utils-private.h"
32 #include "soup.h"
33 #include "soup-misc.h"
34
35 /**
36 * SECTION:soup-uri-utils
37 * @section_id: SoupURIUtils
38 * @title: URI Utilities
39 * @short_description: Functions to help working with #GUri and HTTP
40 *
41 * Utility functions and defines to help working with URIs.
42 */
43
44 /**
45 * SOUP_HTTP_URI_FLAGS:
46 *
47 * The set of #GUriFlags libsoup expects all #GUri to use.
48 */
49
50 static inline int
soup_scheme_default_port(const char * scheme)51 soup_scheme_default_port (const char *scheme)
52 {
53 if (!g_strcmp0 (scheme, "http") ||
54 !g_strcmp0 (scheme, "ws"))
55 return 80;
56 else if (!g_strcmp0 (scheme, "https") ||
57 !g_strcmp0 (scheme, "wss"))
58 return 443;
59 else if (!g_strcmp0 (scheme, "ftp"))
60 return 21;
61 else
62 return -1;
63 }
64
65 static inline gboolean
parts_equal(const char * one,const char * two,gboolean insensitive)66 parts_equal (const char *one, const char *two, gboolean insensitive)
67 {
68 if (!one && !two)
69 return TRUE;
70 if (!one || !two)
71 return FALSE;
72 return insensitive ? !g_ascii_strcasecmp (one, two) : !strcmp (one, two);
73 }
74
75 static inline gboolean
path_equal(const char * one,const char * two)76 path_equal (const char *one, const char *two)
77 {
78 if (one[0] == '\0')
79 one = "/";
80 if (two[0] == '\0')
81 two = "/";
82
83 return !strcmp (one, two);
84 }
85
86 static gboolean
flags_equal(GUriFlags flags1,GUriFlags flags2)87 flags_equal (GUriFlags flags1, GUriFlags flags2)
88 {
89 /* We only care about flags that affect the contents which these do */
90 static const GUriFlags normalization_flags = (G_URI_FLAGS_ENCODED | G_URI_FLAGS_ENCODED_FRAGMENT |
91 G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY |
92 G_URI_FLAGS_SCHEME_NORMALIZE);
93
94 return (flags1 & normalization_flags) == (flags2 & normalization_flags);
95 }
96
97 /**
98 * soup_uri_equal:
99 * @uri1: a #GUri
100 * @uri2: another #GUri
101 *
102 * Tests whether or not @uri1 and @uri2 are equal in all parts
103 *
104 * Returns: %TRUE if equal otherwise %FALSE
105 **/
106 gboolean
soup_uri_equal(GUri * uri1,GUri * uri2)107 soup_uri_equal (GUri *uri1, GUri *uri2)
108 {
109 g_return_val_if_fail (uri1 != NULL, FALSE);
110 g_return_val_if_fail (uri2 != NULL, FALSE);
111
112 if (!flags_equal (g_uri_get_flags (uri1), g_uri_get_flags (uri2)) ||
113 g_strcmp0 (g_uri_get_scheme (uri1), g_uri_get_scheme (uri2)) ||
114 g_uri_get_port (uri1) != g_uri_get_port (uri2) ||
115 !parts_equal (g_uri_get_user (uri1), g_uri_get_user (uri2), FALSE) ||
116 !parts_equal (g_uri_get_password (uri1), g_uri_get_password (uri2), FALSE) ||
117 !parts_equal (g_uri_get_host (uri1), g_uri_get_host (uri2), TRUE) ||
118 !path_equal (g_uri_get_path (uri1), g_uri_get_path (uri2)) ||
119 !parts_equal (g_uri_get_query (uri1), g_uri_get_query (uri2), FALSE) ||
120 !parts_equal (g_uri_get_fragment (uri1), g_uri_get_fragment (uri2), FALSE)) {
121 return FALSE;
122 }
123
124 return TRUE;
125 }
126
127 /**
128 * soup_uri_get_path_and_query:
129 * @uri: a #GUri
130 *
131 * Extracts the `path` and `query` parts from @uri.
132 *
133 * Returns: string of combined path and query
134 **/
135 char *
soup_uri_get_path_and_query(GUri * uri)136 soup_uri_get_path_and_query (GUri *uri)
137 {
138 const char *query;
139
140 g_return_val_if_fail (uri != NULL, NULL);
141
142 query = g_uri_get_query (uri);
143
144 return g_strdup_printf ("%s%c%s", g_uri_get_path (uri),
145 query ? '?' : '\0',
146 query ? query : "");
147 }
148
149 /**
150 * soup_uri_uses_default_port:
151 * @uri: a #GUri
152 *
153 * Tests if @uri uses the default port for its scheme. (Eg, 80 for
154 * http.) (This only works for http, https and ftp; libsoup does not know
155 * the default ports of other protocols.)
156 *
157 * Returns: %TRUE or %FALSE
158 **/
159 gboolean
soup_uri_uses_default_port(GUri * uri)160 soup_uri_uses_default_port (GUri *uri)
161 {
162 g_return_val_if_fail (uri != NULL, FALSE);
163
164 if (g_uri_get_port (uri) == -1)
165 return TRUE;
166
167 if (g_uri_get_scheme (uri))
168 return g_uri_get_port (uri) == soup_scheme_default_port (g_uri_get_scheme (uri));
169
170 return FALSE;
171 }
172
173 GUri *
soup_uri_copy_host(GUri * uri)174 soup_uri_copy_host (GUri *uri)
175 {
176 g_return_val_if_fail (uri != NULL, NULL);
177
178 return soup_uri_copy (uri,
179 SOUP_URI_USER, NULL,
180 SOUP_URI_PASSWORD, NULL,
181 SOUP_URI_AUTH_PARAMS, NULL,
182 SOUP_URI_PATH, "/",
183 SOUP_URI_QUERY, NULL,
184 SOUP_URI_FRAGMENT, NULL,
185 SOUP_URI_NONE);
186 }
187
188 /**
189 * soup_uri_host_hash:
190 * @key: (type GUri): a #GUri with a non-%NULL @host member
191 *
192 * Hashes @key, considering only the scheme, host, and port.
193 *
194 * Returns: A hash
195 */
196 guint
soup_uri_host_hash(gconstpointer key)197 soup_uri_host_hash (gconstpointer key)
198 {
199 GUri *uri = (GUri*)key;
200 const char *host;
201
202 g_return_val_if_fail (uri != NULL, 0);
203
204 host = g_uri_get_host (uri);
205
206 g_return_val_if_fail (host != NULL, 0);
207
208 return soup_str_case_hash (g_uri_get_scheme (uri)) +
209 g_uri_get_port (uri) +
210 soup_str_case_hash (host);
211 }
212
213 /**
214 * soup_uri_host_equal:
215 * @v1: (type GUri): a #GUri with a non-%NULL @host member
216 * @v2: (type GUri): a #GUri with a non-%NULL @host member
217 *
218 * Compares @v1 and @v2, considering only the scheme, host, and port.
219 *
220 * Returns: %TRUE if the URIs are equal in scheme, host, and port.
221 */
222 gboolean
soup_uri_host_equal(gconstpointer v1,gconstpointer v2)223 soup_uri_host_equal (gconstpointer v1, gconstpointer v2)
224 {
225 GUri *one = (GUri*)v1;
226 GUri *two = (GUri*)v2;
227 const char *one_host, *two_host;
228
229 g_return_val_if_fail (one != NULL && two != NULL, one == two);
230
231 one_host = g_uri_get_host (one);
232 two_host = g_uri_get_host (two);
233
234 g_return_val_if_fail (one_host != NULL && two_host != NULL, one_host == two_host);
235
236 if (one == two)
237 return TRUE;
238 if (g_strcmp0 (g_uri_get_scheme (one), g_uri_get_scheme (two)) != 0)
239 return FALSE;
240
241 if (g_uri_get_port (one) != g_uri_get_port (two))
242 return FALSE;
243
244 return g_ascii_strcasecmp (one_host, two_host) == 0;
245 }
246
247 gboolean
soup_uri_is_https(GUri * uri)248 soup_uri_is_https (GUri *uri)
249 {
250 const char *scheme;
251
252 g_assert (uri != NULL);
253
254 scheme = g_uri_get_scheme (uri);
255 if (G_UNLIKELY (scheme == NULL))
256 return FALSE;
257
258 return strcmp (scheme, "https") == 0 || strcmp (scheme, "wss") == 0;
259 }
260
261 gboolean
soup_uri_is_http(GUri * uri)262 soup_uri_is_http (GUri *uri)
263 {
264 const char *scheme;
265
266 g_assert (uri != NULL);
267
268 scheme = g_uri_get_scheme (uri);
269 if (G_UNLIKELY (scheme == NULL))
270 return FALSE;
271
272 return strcmp (scheme, "http") == 0 || strcmp (scheme, "ws") == 0;
273 }
274
275 #define BASE64_INDICATOR ";base64"
276 #define BASE64_INDICATOR_LEN (sizeof (";base64") - 1)
277
278 /**
279 * soup_uri_decode_data_uri:
280 * @uri: a data URI, in string form
281 * @content_type: (out) (nullable) (transfer full): location to store content type, or %NULL
282 *
283 * Decodes the given data URI and returns its contents and @content_type.
284 *
285 * Returns: (transfer full): a #GBytes with the contents of @uri,
286 * or %NULL if @uri is not a valid data URI
287 */
288 GBytes *
soup_uri_decode_data_uri(const char * uri,char ** content_type)289 soup_uri_decode_data_uri (const char *uri,
290 char **content_type)
291 {
292 GUri *soup_uri;
293 const char *comma, *start, *end;
294 gboolean base64 = FALSE;
295 char *uri_string;
296 GBytes *bytes;
297
298 g_return_val_if_fail (uri != NULL, NULL);
299
300 soup_uri = g_uri_parse (uri, SOUP_HTTP_URI_FLAGS, NULL);
301 if (!soup_uri)
302 return NULL;
303
304 if (g_strcmp0 (g_uri_get_scheme (soup_uri), "data") || g_uri_get_host (soup_uri) != NULL) {
305 g_uri_unref (soup_uri);
306 return NULL;
307 }
308
309 if (content_type)
310 *content_type = NULL;
311
312 uri_string = g_uri_to_string (soup_uri);
313 g_uri_unref (soup_uri);
314
315 start = uri_string + 5;
316 comma = strchr (start, ',');
317 if (comma && comma != start) {
318 /* Deal with MIME type / params */
319 if (comma >= start + BASE64_INDICATOR_LEN && !g_ascii_strncasecmp (comma - BASE64_INDICATOR_LEN, BASE64_INDICATOR, BASE64_INDICATOR_LEN)) {
320 end = comma - BASE64_INDICATOR_LEN;
321 base64 = TRUE;
322 } else
323 end = comma;
324
325 if (end != start && content_type)
326 *content_type = g_uri_unescape_segment (start, end, NULL);
327 }
328
329 if (content_type && !*content_type)
330 *content_type = g_strdup ("text/plain;charset=US-ASCII");
331
332 if (comma)
333 start = comma + 1;
334
335 if (*start) {
336 bytes = g_uri_unescape_bytes (start, -1, NULL, NULL);
337
338 if (base64 && bytes) {
339 if (g_bytes_get_size (bytes) <= 1)
340 g_clear_pointer (&bytes, g_bytes_unref);
341 else {
342 gsize content_length;
343 GByteArray *unescaped_array = g_bytes_unref_to_array (bytes);
344 g_base64_decode_inplace ((gchar*)unescaped_array->data, &content_length);
345 unescaped_array->len = content_length;
346 bytes = g_byte_array_free_to_bytes (unescaped_array);
347 }
348 }
349 } else {
350 bytes = g_bytes_new_static (NULL, 0);
351 }
352 g_free (uri_string);
353
354 return bytes;
355 }
356
357 /**
358 * SoupURIComponent:
359 * @SOUP_URI_NONE: no component
360 * @SOUP_URI_SCHEME: the URI scheme component
361 * @SOUP_URI_USER: the URI user component
362 * @SOUP_URI_PASSWORD: the URI password component
363 * @SOUP_URI_AUTH_PARAMS: the URI authentication parameters component
364 * @SOUP_URI_HOST: the URI host component
365 * @SOUP_URI_PORT: the URI port component
366 * @SOUP_URI_PATH: the URI path component
367 * @SOUP_URI_QUERY: the URI query component
368 * @SOUP_URI_FRAGMENT: the URI fragment component
369 *
370 * Enum values passed to soup_uri_copy() to indicate the components of
371 * the URI that should be updated with the given values.
372 */
373
374 /**
375 * soup_uri_copy: (skip)
376 * @uri: the #GUri to copy
377 * @first_component: first #SoupURIComponent to update
378 * @...: value of @first_component followed by additional
379 * components and values, terminated by %SOUP_URI_NONE
380 *
381 * Return a copy of @uri with the given components updated
382 *
383 * Returns: (transfer full): a new #GUri
384 */
385 GUri *
soup_uri_copy(GUri * uri,SoupURIComponent first_component,...)386 soup_uri_copy (GUri *uri,
387 SoupURIComponent first_component,
388 ...)
389 {
390 va_list args;
391 SoupURIComponent component = first_component;
392 gpointer values[SOUP_URI_FRAGMENT + 1];
393 gboolean values_to_set[SOUP_URI_FRAGMENT + 1];
394 GUriFlags flags = g_uri_get_flags (uri);
395
396 g_return_val_if_fail (uri != NULL, NULL);
397
398 memset (&values_to_set, 0, sizeof (values_to_set));
399
400 va_start (args, first_component);
401 while (component != SOUP_URI_NONE) {
402 if (component == SOUP_URI_PORT)
403 values[component] = GINT_TO_POINTER (va_arg (args, glong));
404 else
405 values[component] = va_arg (args, gpointer);
406 values_to_set[component] = TRUE;
407 component = va_arg (args, SoupURIComponent);
408 }
409 va_end (args);
410
411 if (values_to_set[SOUP_URI_PASSWORD])
412 flags |= G_URI_FLAGS_HAS_PASSWORD;
413 if (values_to_set[SOUP_URI_AUTH_PARAMS])
414 flags |= G_URI_FLAGS_HAS_AUTH_PARAMS;
415 if (values_to_set[SOUP_URI_PATH])
416 flags |= G_URI_FLAGS_ENCODED_PATH;
417 if (values_to_set[SOUP_URI_QUERY])
418 flags |= G_URI_FLAGS_ENCODED_QUERY;
419 if (values_to_set[SOUP_URI_FRAGMENT])
420 flags |= G_URI_FLAGS_ENCODED_FRAGMENT;
421 return g_uri_build_with_user (
422 flags,
423 values_to_set[SOUP_URI_SCHEME] ? values[SOUP_URI_SCHEME] : g_uri_get_scheme (uri),
424 values_to_set[SOUP_URI_USER] ? values[SOUP_URI_USER] : g_uri_get_user (uri),
425 values_to_set[SOUP_URI_PASSWORD] ? values[SOUP_URI_PASSWORD] : g_uri_get_password (uri),
426 values_to_set[SOUP_URI_AUTH_PARAMS] ? values[SOUP_URI_AUTH_PARAMS] : g_uri_get_auth_params (uri),
427 values_to_set[SOUP_URI_HOST] ? values[SOUP_URI_HOST] : g_uri_get_host (uri),
428 values_to_set[SOUP_URI_PORT] ? GPOINTER_TO_INT (values[SOUP_URI_PORT]) : g_uri_get_port (uri),
429 values_to_set[SOUP_URI_PATH] ? values[SOUP_URI_PATH] : g_uri_get_path (uri),
430 values_to_set[SOUP_URI_QUERY] ? values[SOUP_URI_QUERY] : g_uri_get_query (uri),
431 values_to_set[SOUP_URI_FRAGMENT] ? values[SOUP_URI_FRAGMENT] : g_uri_get_fragment (uri)
432 );
433 }
434
435 GUri *
soup_uri_copy_with_normalized_flags(GUri * uri)436 soup_uri_copy_with_normalized_flags (GUri *uri)
437 {
438 GUriFlags flags = g_uri_get_flags (uri);
439
440 /* We require its encoded (hostname encoding optional) */
441 if (((flags & (G_URI_FLAGS_ENCODED_PATH | G_URI_FLAGS_ENCODED_QUERY | G_URI_FLAGS_ENCODED_FRAGMENT)) ||
442 (flags & G_URI_FLAGS_ENCODED)) &&
443 /* And has scheme-based normalization */
444 (flags & G_URI_FLAGS_SCHEME_NORMALIZE))
445 return g_uri_ref (uri);
446
447 return g_uri_build_with_user (
448 g_uri_get_flags (uri) | SOUP_HTTP_URI_FLAGS,
449 g_uri_get_scheme (uri),
450 g_uri_get_user (uri),
451 g_uri_get_password (uri),
452 g_uri_get_auth_params (uri),
453 g_uri_get_host (uri),
454 g_uri_get_port (uri),
455 g_uri_get_path (uri),
456 g_uri_get_query (uri),
457 g_uri_get_fragment (uri)
458 );
459 }
460
461 char *
soup_uri_get_host_for_headers(GUri * uri)462 soup_uri_get_host_for_headers (GUri *uri)
463 {
464 const char *host = g_uri_get_host (uri);
465
466 if (strchr (host, ':'))
467 return g_strdup_printf ("[%.*s]", (int)strcspn (host, "%"), host);
468 if (g_hostname_is_non_ascii (host))
469 return g_hostname_to_ascii (host);
470
471 return g_strdup (host);
472 }
473