1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3 * Copyright © 2000-2003 Marco Pesenti Gritti
4 * Copyright © 2003, 2004, 2005 Christian Persch
5 * Copyright © 2004 Crispin Flowerday
6 * Copyright © 2004 Adam Hooper
7 *
8 * This file is part of Epiphany.
9 *
10 * Epiphany is free software: you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation, either version 3 of the License, or
13 * (at your option) any later version.
14 *
15 * Epiphany is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with Epiphany. If not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "config.h"
25 #include "ephy-embed-utils.h"
26
27 #include "ephy-about-handler.h"
28 #include "ephy-prefs.h"
29 #include "ephy-reader-handler.h"
30 #include "ephy-settings.h"
31 #include "ephy-string.h"
32 #include "ephy-view-source-handler.h"
33
34 #include <glib/gi18n.h>
35 #include <jsc/jsc.h>
36 #include <libsoup/soup.h>
37 #include <string.h>
38
39 static GRegex *non_search_regex;
40 static GRegex *domain_regex;
41
42 char *
ephy_embed_utils_link_message_parse(const char * message)43 ephy_embed_utils_link_message_parse (const char *message)
44 {
45 char *status_message;
46 char **splitted_message;
47 int i = 1;
48 char *p;
49 GString *tmp;
50
51 status_message = ephy_string_blank_chr (g_strdup (message));
52
53 if (!status_message || !g_str_has_prefix (status_message, "mailto:"))
54 return status_message;
55
56 /* We first want to eliminate all the things after "?", like cc,
57 * subject and alike.
58 */
59 p = strchr (status_message, '?');
60 if (p != NULL) *p = '\0';
61
62 /* Then we also want to check if there is more than an email address
63 * in the mailto: list.
64 */
65 splitted_message = g_strsplit_set (status_message, ";", -1);
66 tmp = g_string_new (g_strdup_printf (_("Send an email message to “%s”"),
67 (splitted_message[0] + 7)));
68
69 while (splitted_message [i] != NULL) {
70 g_string_append_printf (tmp, ", “%s”", splitted_message[i]);
71 i++;
72 }
73
74 g_free (status_message);
75 g_strfreev (splitted_message);
76
77 return g_string_free (tmp, FALSE);
78 }
79
80 static gpointer
create_non_search_regex(gpointer user_data)81 create_non_search_regex (gpointer user_data)
82 {
83 non_search_regex = g_regex_new (EPHY_WEB_VIEW_NON_SEARCH_REGEX,
84 G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, NULL);
85 return non_search_regex;
86 }
87
88 static GRegex *
get_non_search_regex(void)89 get_non_search_regex (void)
90 {
91 static GOnce once_init = G_ONCE_INIT;
92
93 return g_once (&once_init, create_non_search_regex, NULL);
94 }
95
96 static gpointer
create_domain_regex(gpointer user_data)97 create_domain_regex (gpointer user_data)
98 {
99 domain_regex = g_regex_new (EPHY_WEB_VIEW_DOMAIN_REGEX,
100 G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, NULL);
101 return domain_regex;
102 }
103
104 static GRegex *
get_domain_regex(void)105 get_domain_regex (void)
106 {
107 static GOnce once_init = G_ONCE_INIT;
108
109 return g_once (&once_init, create_domain_regex, NULL);
110 }
111
112 gboolean
ephy_embed_utils_address_has_web_scheme(const char * address)113 ephy_embed_utils_address_has_web_scheme (const char *address)
114 {
115 gboolean has_web_scheme;
116 int colonpos;
117
118 if (address == NULL)
119 return FALSE;
120
121 colonpos = (int)((strstr (address, ":")) - address);
122
123 if (colonpos < 0)
124 return FALSE;
125
126 has_web_scheme = !(g_ascii_strncasecmp (address, "http", colonpos) &&
127 g_ascii_strncasecmp (address, "https", colonpos) &&
128 g_ascii_strncasecmp (address, "file", colonpos) &&
129 g_ascii_strncasecmp (address, "javascript", colonpos) &&
130 g_ascii_strncasecmp (address, "data", colonpos) &&
131 g_ascii_strncasecmp (address, "blob", colonpos) &&
132 g_ascii_strncasecmp (address, "about", colonpos) &&
133 g_ascii_strncasecmp (address, "ephy-about", colonpos) &&
134 g_ascii_strncasecmp (address, "ephy-resource", colonpos) &&
135 g_ascii_strncasecmp (address, "ephy-source", colonpos) &&
136 g_ascii_strncasecmp (address, "ephy-reader", colonpos) &&
137 g_ascii_strncasecmp (address, "ephy-pdf", colonpos) &&
138 g_ascii_strncasecmp (address, "gopher", colonpos) &&
139 g_ascii_strncasecmp (address, "inspector", colonpos) &&
140 g_ascii_strncasecmp (address, "webkit", colonpos));
141
142 return has_web_scheme;
143 }
144
145 gboolean
ephy_embed_utils_address_is_existing_absolute_filename(const char * address)146 ephy_embed_utils_address_is_existing_absolute_filename (const char *address)
147 {
148 g_autofree char *real_address = NULL;
149
150 if (!strchr (address, '#')) {
151 real_address = g_strdup (address);
152 } else {
153 gint pos;
154
155 pos = g_strstr_len (address, -1, "#") - address;
156 real_address = g_strndup (address, pos);
157 }
158
159 return g_path_is_absolute (real_address) &&
160 g_file_test (real_address, G_FILE_TEST_EXISTS);
161 }
162
163 static gboolean
is_public_domain(const char * address)164 is_public_domain (const char *address)
165 {
166 char *host;
167 gboolean retval = FALSE;
168
169 host = ephy_string_get_host_name (address);
170 if (!host)
171 return FALSE;
172
173 if (g_regex_match (get_domain_regex (), host, 0, NULL)) {
174 if (!strcmp (host, "localhost"))
175 retval = TRUE;
176 else {
177 const char *end;
178
179 end = g_strrstr (host, ".");
180 if (end && *end != '\0')
181 retval = soup_tld_domain_is_public_suffix (end);
182 }
183 }
184
185 g_free (host);
186
187 return retval;
188 }
189
190 static gboolean
is_bang_search(const char * address)191 is_bang_search (const char *address)
192 {
193 EphyEmbedShell *shell;
194 EphySearchEngineManager *search_engine_manager;
195 char **bangs;
196 GString *buffer;
197
198 shell = ephy_embed_shell_get_default ();
199 search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
200 bangs = ephy_search_engine_manager_get_bangs (search_engine_manager);
201
202 for (uint i = 0; bangs[i] != NULL; i++) {
203 buffer = g_string_new (bangs[i]);
204 g_string_append (buffer, " ");
205
206 if (strstr (address, buffer->str) == address) {
207 g_string_free (buffer, TRUE);
208 g_free (bangs);
209 return TRUE;
210 }
211 g_string_free (buffer, TRUE);
212 }
213 g_free (bangs);
214
215 return FALSE;
216 }
217
218 static gboolean
is_host_with_port(const char * address)219 is_host_with_port (const char *address)
220 {
221 g_auto (GStrv) split = NULL;
222 gint64 port = 0;
223
224 if (strchr (address, ' '))
225 return FALSE;
226
227 split = g_strsplit (address, ":", -1);
228 if (g_strv_length (split) == 2)
229 port = g_ascii_strtoll (split[1], NULL, 10);
230
231 return port != 0;
232 }
233
234 gboolean
ephy_embed_utils_address_is_valid(const char * address)235 ephy_embed_utils_address_is_valid (const char *address)
236 {
237 char *scheme;
238 gboolean retval;
239 GAppInfo *info = NULL;
240
241 if (!address)
242 return FALSE;
243
244 scheme = g_uri_parse_scheme (address);
245
246 if (scheme != NULL) {
247 info = g_app_info_get_default_for_uri_scheme (scheme);
248 g_free (scheme);
249 }
250
251 retval = info ||
252 ephy_embed_utils_address_is_existing_absolute_filename (address) ||
253 g_regex_match (get_non_search_regex (), address, 0, NULL) ||
254 is_public_domain (address) ||
255 is_bang_search (address) ||
256 is_host_with_port (address);
257
258 g_clear_object (&info);
259
260 return retval;
261 }
262
263 static char *
ensure_host_name_is_lowercase(const char * address)264 ensure_host_name_is_lowercase (const char *address)
265 {
266 g_autofree gchar *host = ephy_string_get_host_name (address);
267 g_autofree gchar *lowercase_host = NULL;
268
269 if (!host)
270 return g_strdup (address);
271
272 lowercase_host = g_utf8_strdown (host, -1);
273
274 if (strcmp (host, lowercase_host) != 0)
275 return ephy_string_find_and_replace (address, host, lowercase_host);
276 else
277 return g_strdup (address);
278 }
279
280 char *
ephy_embed_utils_normalize_address(const char * input_address)281 ephy_embed_utils_normalize_address (const char *input_address)
282 {
283 char *effective_address = NULL;
284 g_autofree gchar *address = NULL;
285
286 g_assert (input_address);
287 /* We don't want to lowercase the host name if it's a bang search, as it's not a URI.
288 * It would otherwise lowercase the entire search string, bang included, which is not
289 * what we want. So use input_address directly.
290 */
291 if (is_bang_search (input_address)) {
292 EphyEmbedShell *shell;
293 EphySearchEngineManager *search_engine_manager;
294
295 shell = ephy_embed_shell_get_default ();
296 search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
297 return ephy_search_engine_manager_parse_bang_search (search_engine_manager,
298 input_address);
299 }
300
301 address = ensure_host_name_is_lowercase (input_address);
302
303 if (ephy_embed_utils_address_is_existing_absolute_filename (address))
304 return g_strconcat ("file://", address, NULL);
305
306 if (strcmp (address, "about:gpu") == 0)
307 return g_strdup ("webkit://gpu");
308
309 if (g_str_has_prefix (address, "about:") && strcmp (address, "about:blank"))
310 return g_strconcat (EPHY_ABOUT_SCHEME, address + strlen ("about"), NULL);
311
312 if (!ephy_embed_utils_address_has_web_scheme (address)) {
313 const char *scheme;
314
315 scheme = g_uri_peek_scheme (address);
316
317 /* Auto-prepend http:// to anything that is not
318 * one according to GLib, because it probably will be
319 * something like "google.com". Special case localhost(:port)
320 * and IP(:port), because GUri, correctly, thinks it is a
321 * URI with scheme being localhost/IP and, optionally, path
322 * being the port. Ideally we should check if we have a
323 * handler for the scheme, and since we'll fail for localhost
324 * and IP, we'd fallback to loading it as a domain. */
325 if (!scheme ||
326 !g_strcmp0 (scheme, "localhost") ||
327 g_hostname_is_ip_address (scheme) ||
328 is_host_with_port (address))
329 effective_address = g_strconcat ("http://", address, NULL);
330 }
331
332 return effective_address ? effective_address : g_strdup (address);
333 }
334
335 char *
ephy_embed_utils_autosearch_address(const char * search_key)336 ephy_embed_utils_autosearch_address (const char *search_key)
337 {
338 char *query_param;
339 const char *address_search;
340 char *effective_address;
341 EphyEmbedShell *shell;
342 EphySearchEngineManager *search_engine_manager;
343
344 if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_AUTOSEARCH))
345 return g_strdup (search_key);
346
347 shell = ephy_embed_shell_get_default ();
348 search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
349 address_search = ephy_search_engine_manager_get_default_search_address (search_engine_manager);
350
351 query_param = soup_form_encode ("q", search_key, NULL);
352 #pragma GCC diagnostic push
353 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
354 /* Format string under control of user input... but gsettings is trusted input. */
355 /* + 2 here is getting rid of 'q=' */
356 effective_address = g_strdup_printf (address_search, query_param + 2);
357 #pragma GCC diagnostic pop
358 g_free (query_param);
359
360 return effective_address;
361 }
362
363 char *
ephy_embed_utils_normalize_or_autosearch_address(const char * address)364 ephy_embed_utils_normalize_or_autosearch_address (const char *address)
365 {
366 if (ephy_embed_utils_address_is_valid (address))
367 return ephy_embed_utils_normalize_address (address);
368 else
369 return ephy_embed_utils_autosearch_address (address);
370 }
371
372 gboolean
ephy_embed_utils_url_is_empty(const char * location)373 ephy_embed_utils_url_is_empty (const char *location)
374 {
375 return (location == NULL ||
376 location[0] == '\0' ||
377 strcmp (location, "about:blank") == 0 ||
378 strcmp (location, "ephy-about:overview") == 0 ||
379 strcmp (location, "ephy-about:incognito") == 0);
380 }
381
382 /* This is the list of addresses that should never be shown in the
383 * window's location entry. */
384 static const char *do_not_show_address[] = {
385 "about:blank",
386 "ephy-about:incognito",
387 "ephy-about:overview",
388 NULL
389 };
390
391 gboolean
ephy_embed_utils_is_no_show_address(const char * address)392 ephy_embed_utils_is_no_show_address (const char *address)
393 {
394 int i;
395
396 if (!address)
397 return FALSE;
398
399 for (i = 0; do_not_show_address[i]; i++)
400 if (!strcmp (address, do_not_show_address[i]))
401 return TRUE;
402
403 if (g_str_has_prefix (address, EPHY_VIEW_SOURCE_SCHEME))
404 return TRUE;
405
406 return FALSE;
407 }
408
409 char *
ephy_embed_utils_get_title_from_address(const char * address)410 ephy_embed_utils_get_title_from_address (const char *address)
411 {
412 if (g_str_has_prefix (address, "file://"))
413 return g_strdup (address + 7);
414
415 if (!strcmp (address, EPHY_ABOUT_SCHEME ":overview") ||
416 !strcmp (address, "about:overview"))
417 return g_strdup (_(OVERVIEW_PAGE_TITLE));
418
419 return ephy_string_get_host_name (address);
420 }
421
422 void
ephy_embed_utils_shutdown(void)423 ephy_embed_utils_shutdown (void)
424 {
425 g_clear_pointer (&non_search_regex, g_regex_unref);
426 g_clear_pointer (&domain_regex, g_regex_unref);
427 }
428