1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /*
3  *  Copyright © 2000-2003 Marco Pesenti Gritti
4  *  Copyright © 2003, 2004, 2005 Christian Persch
5  *  Copyright © 2004 Crispin Flowerday
6  *  Copyright © 2004 Adam Hooper
7  *
8  *  This file is part of Epiphany.
9  *
10  *  Epiphany is free software: you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License as published by
12  *  the Free Software Foundation, either version 3 of the License, or
13  *  (at your option) any later version.
14  *
15  *  Epiphany is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU General Public License for more details.
19  *
20  *  You should have received a copy of the GNU General Public License
21  *  along with Epiphany.  If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 #include "config.h"
25 #include "ephy-embed-utils.h"
26 
27 #include "ephy-about-handler.h"
28 #include "ephy-prefs.h"
29 #include "ephy-reader-handler.h"
30 #include "ephy-settings.h"
31 #include "ephy-string.h"
32 #include "ephy-view-source-handler.h"
33 
34 #include <glib/gi18n.h>
35 #include <jsc/jsc.h>
36 #include <libsoup/soup.h>
37 #include <string.h>
38 
39 static GRegex *non_search_regex;
40 static GRegex *domain_regex;
41 
42 char *
ephy_embed_utils_link_message_parse(const char * message)43 ephy_embed_utils_link_message_parse (const char *message)
44 {
45   char *status_message;
46   char **splitted_message;
47   int i = 1;
48   char *p;
49   GString *tmp;
50 
51   status_message = ephy_string_blank_chr (g_strdup (message));
52 
53   if (!status_message || !g_str_has_prefix (status_message, "mailto:"))
54     return status_message;
55 
56   /* We first want to eliminate all the things after "?", like cc,
57    * subject and alike.
58    */
59   p = strchr (status_message, '?');
60   if (p != NULL) *p = '\0';
61 
62   /* Then we also want to check if there is more than an email address
63    * in the mailto: list.
64    */
65   splitted_message = g_strsplit_set (status_message, ";", -1);
66   tmp = g_string_new (g_strdup_printf (_("Send an email message to “%s”"),
67                                        (splitted_message[0] + 7)));
68 
69   while (splitted_message [i] != NULL) {
70     g_string_append_printf (tmp, ", “%s”", splitted_message[i]);
71     i++;
72   }
73 
74   g_free (status_message);
75   g_strfreev (splitted_message);
76 
77   return g_string_free (tmp, FALSE);
78 }
79 
80 static gpointer
create_non_search_regex(gpointer user_data)81 create_non_search_regex (gpointer user_data)
82 {
83   non_search_regex = g_regex_new (EPHY_WEB_VIEW_NON_SEARCH_REGEX,
84                                   G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, NULL);
85   return non_search_regex;
86 }
87 
88 static GRegex *
get_non_search_regex(void)89 get_non_search_regex (void)
90 {
91   static GOnce once_init = G_ONCE_INIT;
92 
93   return g_once (&once_init, create_non_search_regex, NULL);
94 }
95 
96 static gpointer
create_domain_regex(gpointer user_data)97 create_domain_regex (gpointer user_data)
98 {
99   domain_regex = g_regex_new (EPHY_WEB_VIEW_DOMAIN_REGEX,
100                               G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, NULL);
101   return domain_regex;
102 }
103 
104 static GRegex *
get_domain_regex(void)105 get_domain_regex (void)
106 {
107   static GOnce once_init = G_ONCE_INIT;
108 
109   return g_once (&once_init, create_domain_regex, NULL);
110 }
111 
112 gboolean
ephy_embed_utils_address_has_web_scheme(const char * address)113 ephy_embed_utils_address_has_web_scheme (const char *address)
114 {
115   gboolean has_web_scheme;
116   int colonpos;
117 
118   if (address == NULL)
119     return FALSE;
120 
121   colonpos = (int)((strstr (address, ":")) - address);
122 
123   if (colonpos < 0)
124     return FALSE;
125 
126   has_web_scheme = !(g_ascii_strncasecmp (address, "http", colonpos) &&
127                      g_ascii_strncasecmp (address, "https", colonpos) &&
128                      g_ascii_strncasecmp (address, "file", colonpos) &&
129                      g_ascii_strncasecmp (address, "javascript", colonpos) &&
130                      g_ascii_strncasecmp (address, "data", colonpos) &&
131                      g_ascii_strncasecmp (address, "blob", colonpos) &&
132                      g_ascii_strncasecmp (address, "about", colonpos) &&
133                      g_ascii_strncasecmp (address, "ephy-about", colonpos) &&
134                      g_ascii_strncasecmp (address, "ephy-resource", colonpos) &&
135                      g_ascii_strncasecmp (address, "ephy-source", colonpos) &&
136                      g_ascii_strncasecmp (address, "ephy-reader", colonpos) &&
137                      g_ascii_strncasecmp (address, "ephy-pdf", colonpos) &&
138                      g_ascii_strncasecmp (address, "gopher", colonpos) &&
139                      g_ascii_strncasecmp (address, "inspector", colonpos) &&
140                      g_ascii_strncasecmp (address, "webkit", colonpos));
141 
142   return has_web_scheme;
143 }
144 
145 gboolean
ephy_embed_utils_address_is_existing_absolute_filename(const char * address)146 ephy_embed_utils_address_is_existing_absolute_filename (const char *address)
147 {
148   g_autofree char *real_address = NULL;
149 
150   if (!strchr (address, '#')) {
151     real_address = g_strdup (address);
152   } else {
153     gint pos;
154 
155     pos = g_strstr_len (address, -1, "#") - address;
156     real_address = g_strndup (address, pos);
157   }
158 
159   return g_path_is_absolute (real_address) &&
160          g_file_test (real_address, G_FILE_TEST_EXISTS);
161 }
162 
163 static gboolean
is_public_domain(const char * address)164 is_public_domain (const char *address)
165 {
166   char *host;
167   gboolean retval = FALSE;
168 
169   host = ephy_string_get_host_name (address);
170   if (!host)
171     return FALSE;
172 
173   if (g_regex_match (get_domain_regex (), host, 0, NULL)) {
174     if (!strcmp (host, "localhost"))
175       retval = TRUE;
176     else {
177       const char *end;
178 
179       end = g_strrstr (host, ".");
180       if (end && *end != '\0')
181         retval = soup_tld_domain_is_public_suffix (end);
182     }
183   }
184 
185   g_free (host);
186 
187   return retval;
188 }
189 
190 static gboolean
is_bang_search(const char * address)191 is_bang_search (const char *address)
192 {
193   EphyEmbedShell *shell;
194   EphySearchEngineManager *search_engine_manager;
195   char **bangs;
196   GString *buffer;
197 
198   shell = ephy_embed_shell_get_default ();
199   search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
200   bangs = ephy_search_engine_manager_get_bangs (search_engine_manager);
201 
202   for (uint i = 0; bangs[i] != NULL; i++) {
203     buffer = g_string_new (bangs[i]);
204     g_string_append (buffer, " ");
205 
206     if (strstr (address, buffer->str) == address) {
207       g_string_free (buffer, TRUE);
208       g_free (bangs);
209       return TRUE;
210     }
211     g_string_free (buffer, TRUE);
212   }
213   g_free (bangs);
214 
215   return FALSE;
216 }
217 
218 static gboolean
is_host_with_port(const char * address)219 is_host_with_port (const char *address)
220 {
221   g_auto (GStrv) split = NULL;
222   gint64 port = 0;
223 
224   if (strchr (address, ' '))
225     return FALSE;
226 
227   split = g_strsplit (address, ":", -1);
228   if (g_strv_length (split) == 2)
229     port = g_ascii_strtoll (split[1], NULL, 10);
230 
231   return port != 0;
232 }
233 
234 gboolean
ephy_embed_utils_address_is_valid(const char * address)235 ephy_embed_utils_address_is_valid (const char *address)
236 {
237   char *scheme;
238   gboolean retval;
239   GAppInfo *info = NULL;
240 
241   if (!address)
242     return FALSE;
243 
244   scheme = g_uri_parse_scheme (address);
245 
246   if (scheme != NULL) {
247     info = g_app_info_get_default_for_uri_scheme (scheme);
248     g_free (scheme);
249   }
250 
251   retval = info ||
252            ephy_embed_utils_address_is_existing_absolute_filename (address) ||
253            g_regex_match (get_non_search_regex (), address, 0, NULL) ||
254            is_public_domain (address) ||
255            is_bang_search (address) ||
256            is_host_with_port (address);
257 
258   g_clear_object (&info);
259 
260   return retval;
261 }
262 
263 static char *
ensure_host_name_is_lowercase(const char * address)264 ensure_host_name_is_lowercase (const char *address)
265 {
266   g_autofree gchar *host = ephy_string_get_host_name (address);
267   g_autofree gchar *lowercase_host = NULL;
268 
269   if (!host)
270     return g_strdup (address);
271 
272   lowercase_host = g_utf8_strdown (host, -1);
273 
274   if (strcmp (host, lowercase_host) != 0)
275     return ephy_string_find_and_replace (address, host, lowercase_host);
276   else
277     return g_strdup (address);
278 }
279 
280 char *
ephy_embed_utils_normalize_address(const char * input_address)281 ephy_embed_utils_normalize_address (const char *input_address)
282 {
283   char *effective_address = NULL;
284   g_autofree gchar *address = NULL;
285 
286   g_assert (input_address);
287   /* We don't want to lowercase the host name if it's a bang search, as it's not a URI.
288    * It would otherwise lowercase the entire search string, bang included, which is not
289    * what we want. So use input_address directly.
290    */
291   if (is_bang_search (input_address)) {
292     EphyEmbedShell *shell;
293     EphySearchEngineManager *search_engine_manager;
294 
295     shell = ephy_embed_shell_get_default ();
296     search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
297     return ephy_search_engine_manager_parse_bang_search (search_engine_manager,
298                                                          input_address);
299   }
300 
301   address = ensure_host_name_is_lowercase (input_address);
302 
303   if (ephy_embed_utils_address_is_existing_absolute_filename (address))
304     return g_strconcat ("file://", address, NULL);
305 
306   if (strcmp (address, "about:gpu") == 0)
307     return g_strdup ("webkit://gpu");
308 
309   if (g_str_has_prefix (address, "about:") && strcmp (address, "about:blank"))
310     return g_strconcat (EPHY_ABOUT_SCHEME, address + strlen ("about"), NULL);
311 
312   if (!ephy_embed_utils_address_has_web_scheme (address)) {
313     const char *scheme;
314 
315     scheme = g_uri_peek_scheme (address);
316 
317     /* Auto-prepend http:// to anything that is not
318      * one according to GLib, because it probably will be
319      * something like "google.com". Special case localhost(:port)
320      * and IP(:port), because GUri, correctly, thinks it is a
321      * URI with scheme being localhost/IP and, optionally, path
322      * being the port. Ideally we should check if we have a
323      * handler for the scheme, and since we'll fail for localhost
324      * and IP, we'd fallback to loading it as a domain. */
325     if (!scheme ||
326         !g_strcmp0 (scheme, "localhost") ||
327         g_hostname_is_ip_address (scheme) ||
328         is_host_with_port (address))
329       effective_address = g_strconcat ("http://", address, NULL);
330   }
331 
332   return effective_address ? effective_address : g_strdup (address);
333 }
334 
335 char *
ephy_embed_utils_autosearch_address(const char * search_key)336 ephy_embed_utils_autosearch_address (const char *search_key)
337 {
338   char *query_param;
339   const char *address_search;
340   char *effective_address;
341   EphyEmbedShell *shell;
342   EphySearchEngineManager *search_engine_manager;
343 
344   if (!g_settings_get_boolean (EPHY_SETTINGS_WEB, EPHY_PREFS_WEB_ENABLE_AUTOSEARCH))
345     return g_strdup (search_key);
346 
347   shell = ephy_embed_shell_get_default ();
348   search_engine_manager = ephy_embed_shell_get_search_engine_manager (shell);
349   address_search = ephy_search_engine_manager_get_default_search_address (search_engine_manager);
350 
351   query_param = soup_form_encode ("q", search_key, NULL);
352 #pragma GCC diagnostic push
353 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
354   /* Format string under control of user input... but gsettings is trusted input. */
355   /* + 2 here is getting rid of 'q=' */
356   effective_address = g_strdup_printf (address_search, query_param + 2);
357 #pragma GCC diagnostic pop
358   g_free (query_param);
359 
360   return effective_address;
361 }
362 
363 char *
ephy_embed_utils_normalize_or_autosearch_address(const char * address)364 ephy_embed_utils_normalize_or_autosearch_address (const char *address)
365 {
366   if (ephy_embed_utils_address_is_valid (address))
367     return ephy_embed_utils_normalize_address (address);
368   else
369     return ephy_embed_utils_autosearch_address (address);
370 }
371 
372 gboolean
ephy_embed_utils_url_is_empty(const char * location)373 ephy_embed_utils_url_is_empty (const char *location)
374 {
375   return (location == NULL ||
376           location[0] == '\0' ||
377           strcmp (location, "about:blank") == 0 ||
378           strcmp (location, "ephy-about:overview") == 0 ||
379           strcmp (location, "ephy-about:incognito") == 0);
380 }
381 
382 /* This is the list of addresses that should never be shown in the
383  * window's location entry. */
384 static const char *do_not_show_address[] = {
385   "about:blank",
386   "ephy-about:incognito",
387   "ephy-about:overview",
388   NULL
389 };
390 
391 gboolean
ephy_embed_utils_is_no_show_address(const char * address)392 ephy_embed_utils_is_no_show_address (const char *address)
393 {
394   int i;
395 
396   if (!address)
397     return FALSE;
398 
399   for (i = 0; do_not_show_address[i]; i++)
400     if (!strcmp (address, do_not_show_address[i]))
401       return TRUE;
402 
403   if (g_str_has_prefix (address, EPHY_VIEW_SOURCE_SCHEME))
404     return TRUE;
405 
406   return FALSE;
407 }
408 
409 char *
ephy_embed_utils_get_title_from_address(const char * address)410 ephy_embed_utils_get_title_from_address (const char *address)
411 {
412   if (g_str_has_prefix (address, "file://"))
413     return g_strdup (address + 7);
414 
415   if (!strcmp (address, EPHY_ABOUT_SCHEME ":overview") ||
416       !strcmp (address, "about:overview"))
417     return g_strdup (_(OVERVIEW_PAGE_TITLE));
418 
419   return ephy_string_get_host_name (address);
420 }
421 
422 void
ephy_embed_utils_shutdown(void)423 ephy_embed_utils_shutdown (void)
424 {
425   g_clear_pointer (&non_search_regex, g_regex_unref);
426   g_clear_pointer (&domain_regex, g_regex_unref);
427 }
428