1 use std::{
2     collections::HashMap,
3     fs::{create_dir_all, remove_file, symlink_metadata, File},
4     io::prelude::*,
5     net::{IpAddr, ToSocketAddrs},
6     sync::{Arc, RwLock},
7     time::{Duration, SystemTime},
8 };
9 
10 use once_cell::sync::Lazy;
11 use regex::Regex;
12 use reqwest::{blocking::Client, blocking::Response, header};
13 use rocket::{http::ContentType, response::Content, Route};
14 
15 use crate::{
16     error::Error,
17     util::{get_reqwest_client_builder, Cached},
18     CONFIG,
19 };
20 
routes() -> Vec<Route>21 pub fn routes() -> Vec<Route> {
22     routes![icon]
23 }
24 
25 static CLIENT: Lazy<Client> = Lazy::new(|| {
26     // Generate the default headers
27     let mut default_headers = header::HeaderMap::new();
28     default_headers
29         .insert(header::USER_AGENT, header::HeaderValue::from_static("Links (2.22; Linux X86_64; GNU C; text)"));
30     default_headers
31         .insert(header::ACCEPT, header::HeaderValue::from_static("text/html, text/*;q=0.5, image/*, */*;q=0.1"));
32     default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en,*;q=0.1"));
33     default_headers.insert(header::CACHE_CONTROL, header::HeaderValue::from_static("no-cache"));
34     default_headers.insert(header::PRAGMA, header::HeaderValue::from_static("no-cache"));
35 
36     // Reuse the client between requests
37     get_reqwest_client_builder()
38         .cookie_provider(Arc::new(Jar::default()))
39         .timeout(Duration::from_secs(CONFIG.icon_download_timeout()))
40         .default_headers(default_headers)
41         .build()
42         .expect("Failed to build icon client")
43 });
44 
45 // Build Regex only once since this takes a lot of time.
46 static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)icon$|apple.*icon").unwrap());
47 static ICON_REL_BLACKLIST: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)mask-icon").unwrap());
48 static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap());
49 
50 // Special HashMap which holds the user defined Regex to speedup matching the regex.
51 static ICON_BLACKLIST_REGEX: Lazy<RwLock<HashMap<String, Regex>>> = Lazy::new(|| RwLock::new(HashMap::new()));
52 
53 #[get("/<domain>/icon.png")]
icon(domain: String) -> Cached<Content<Vec<u8>>>54 fn icon(domain: String) -> Cached<Content<Vec<u8>>> {
55     const FALLBACK_ICON: &[u8] = include_bytes!("../static/images/fallback-icon.png");
56 
57     if !is_valid_domain(&domain) {
58         warn!("Invalid domain: {}", domain);
59         return Cached::ttl(
60             Content(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()),
61             CONFIG.icon_cache_negttl(),
62         );
63     }
64 
65     match get_icon(&domain) {
66         Some((icon, icon_type)) => {
67             Cached::ttl(Content(ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl())
68         }
69         _ => Cached::ttl(Content(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), CONFIG.icon_cache_negttl()),
70     }
71 }
72 
73 /// Returns if the domain provided is valid or not.
74 ///
75 /// This does some manual checks and makes use of Url to do some basic checking.
76 /// domains can't be larger then 63 characters (not counting multiple subdomains) according to the RFC's, but we limit the total size to 255.
is_valid_domain(domain: &str) -> bool77 fn is_valid_domain(domain: &str) -> bool {
78     const ALLOWED_CHARS: &str = "_-.";
79 
80     // If parsing the domain fails using Url, it will not work with reqwest.
81     if let Err(parse_error) = url::Url::parse(format!("https://{}", domain).as_str()) {
82         debug!("Domain parse error: '{}' - {:?}", domain, parse_error);
83         return false;
84     } else if domain.is_empty()
85         || domain.contains("..")
86         || domain.starts_with('.')
87         || domain.starts_with('-')
88         || domain.ends_with('-')
89     {
90         debug!(
91             "Domain validation error: '{}' is either empty, contains '..', starts with an '.', starts or ends with a '-'",
92             domain
93         );
94         return false;
95     } else if domain.len() > 255 {
96         debug!("Domain validation error: '{}' exceeds 255 characters", domain);
97         return false;
98     }
99 
100     for c in domain.chars() {
101         if !c.is_alphanumeric() && !ALLOWED_CHARS.contains(c) {
102             debug!("Domain validation error: '{}' contains an invalid character '{}'", domain, c);
103             return false;
104         }
105     }
106 
107     true
108 }
109 
110 /// TODO: This is extracted from IpAddr::is_global, which is unstable:
111 /// https://doc.rust-lang.org/nightly/std/net/enum.IpAddr.html#method.is_global
112 /// Remove once https://github.com/rust-lang/rust/issues/27709 is merged
113 #[allow(clippy::nonminimal_bool)]
114 #[cfg(not(feature = "unstable"))]
is_global(ip: IpAddr) -> bool115 fn is_global(ip: IpAddr) -> bool {
116     match ip {
117         IpAddr::V4(ip) => {
118             // check if this address is 192.0.0.9 or 192.0.0.10. These addresses are the only two
119             // globally routable addresses in the 192.0.0.0/24 range.
120             if u32::from(ip) == 0xc0000009 || u32::from(ip) == 0xc000000a {
121                 return true;
122             }
123             !ip.is_private()
124             && !ip.is_loopback()
125             && !ip.is_link_local()
126             && !ip.is_broadcast()
127             && !ip.is_documentation()
128             && !(ip.octets()[0] == 100 && (ip.octets()[1] & 0b1100_0000 == 0b0100_0000))
129             && !(ip.octets()[0] == 192 && ip.octets()[1] == 0 && ip.octets()[2] == 0)
130             && !(ip.octets()[0] & 240 == 240 && !ip.is_broadcast())
131             && !(ip.octets()[0] == 198 && (ip.octets()[1] & 0xfe) == 18)
132             // Make sure the address is not in 0.0.0.0/8
133             && ip.octets()[0] != 0
134         }
135         IpAddr::V6(ip) => {
136             if ip.is_multicast() && ip.segments()[0] & 0x000f == 14 {
137                 true
138             } else {
139                 !ip.is_multicast()
140                     && !ip.is_loopback()
141                     && !((ip.segments()[0] & 0xffc0) == 0xfe80)
142                     && !((ip.segments()[0] & 0xfe00) == 0xfc00)
143                     && !ip.is_unspecified()
144                     && !((ip.segments()[0] == 0x2001) && (ip.segments()[1] == 0xdb8))
145             }
146         }
147     }
148 }
149 
150 #[cfg(feature = "unstable")]
is_global(ip: IpAddr) -> bool151 fn is_global(ip: IpAddr) -> bool {
152     ip.is_global()
153 }
154 
155 /// These are some tests to check that the implementations match
156 /// The IPv4 can be all checked in 5 mins or so and they are correct as of nightly 2020-07-11
157 /// The IPV6 can't be checked in a reasonable time, so we check  about ten billion random ones, so far correct
158 /// Note that the is_global implementation is subject to change as new IP RFCs are created
159 ///
160 /// To run while showing progress output:
161 /// cargo test --features sqlite,unstable -- --nocapture --ignored
162 #[cfg(test)]
163 #[cfg(feature = "unstable")]
164 mod tests {
165     use super::*;
166 
167     #[test]
168     #[ignore]
test_ipv4_global()169     fn test_ipv4_global() {
170         for a in 0..u8::MAX {
171             println!("Iter: {}/255", a);
172             for b in 0..u8::MAX {
173                 for c in 0..u8::MAX {
174                     for d in 0..u8::MAX {
175                         let ip = IpAddr::V4(std::net::Ipv4Addr::new(a, b, c, d));
176                         assert_eq!(ip.is_global(), is_global(ip))
177                     }
178                 }
179             }
180         }
181     }
182 
183     #[test]
184     #[ignore]
test_ipv6_global()185     fn test_ipv6_global() {
186         use ring::rand::{SecureRandom, SystemRandom};
187         let mut v = [0u8; 16];
188         let rand = SystemRandom::new();
189         for i in 0..1_000 {
190             println!("Iter: {}/1_000", i);
191             for _ in 0..10_000_000 {
192                 rand.fill(&mut v).expect("Error generating random values");
193                 let ip = IpAddr::V6(std::net::Ipv6Addr::new(
194                     (v[14] as u16) << 8 | v[15] as u16,
195                     (v[12] as u16) << 8 | v[13] as u16,
196                     (v[10] as u16) << 8 | v[11] as u16,
197                     (v[8] as u16) << 8 | v[9] as u16,
198                     (v[6] as u16) << 8 | v[7] as u16,
199                     (v[4] as u16) << 8 | v[5] as u16,
200                     (v[2] as u16) << 8 | v[3] as u16,
201                     (v[0] as u16) << 8 | v[1] as u16,
202                 ));
203                 assert_eq!(ip.is_global(), is_global(ip))
204             }
205         }
206     }
207 }
208 
is_domain_blacklisted(domain: &str) -> bool209 fn is_domain_blacklisted(domain: &str) -> bool {
210     let mut is_blacklisted = CONFIG.icon_blacklist_non_global_ips()
211         && (domain, 0)
212             .to_socket_addrs()
213             .map(|x| {
214                 for ip_port in x {
215                     if !is_global(ip_port.ip()) {
216                         warn!("IP {} for domain '{}' is not a global IP!", ip_port.ip(), domain);
217                         return true;
218                     }
219                 }
220                 false
221             })
222             .unwrap_or(false);
223 
224     // Skip the regex check if the previous one is true already
225     if !is_blacklisted {
226         if let Some(blacklist) = CONFIG.icon_blacklist_regex() {
227             let mut regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap();
228 
229             // Use the pre-generate Regex stored in a Lazy HashMap if there's one, else generate it.
230             let regex = if let Some(regex) = regex_hashmap.get(&blacklist) {
231                 regex
232             } else {
233                 drop(regex_hashmap);
234 
235                 let mut regex_hashmap_write = ICON_BLACKLIST_REGEX.write().unwrap();
236                 // Clear the current list if the previous key doesn't exists.
237                 // To prevent growing of the HashMap after someone has changed it via the admin interface.
238                 if regex_hashmap_write.len() >= 1 {
239                     regex_hashmap_write.clear();
240                 }
241 
242                 // Generate the regex to store in too the Lazy Static HashMap.
243                 let blacklist_regex = Regex::new(&blacklist).unwrap();
244                 regex_hashmap_write.insert(blacklist.to_string(), blacklist_regex);
245                 drop(regex_hashmap_write);
246 
247                 regex_hashmap = ICON_BLACKLIST_REGEX.read().unwrap();
248                 regex_hashmap.get(&blacklist).unwrap()
249             };
250 
251             // Use the pre-generate Regex stored in a Lazy HashMap.
252             if regex.is_match(domain) {
253                 warn!("Blacklisted domain: {} matched ICON_BLACKLIST_REGEX", domain);
254                 is_blacklisted = true;
255             }
256         }
257     }
258 
259     is_blacklisted
260 }
261 
get_icon(domain: &str) -> Option<(Vec<u8>, String)>262 fn get_icon(domain: &str) -> Option<(Vec<u8>, String)> {
263     let path = format!("{}/{}.png", CONFIG.icon_cache_folder(), domain);
264 
265     // Check for expiration of negatively cached copy
266     if icon_is_negcached(&path) {
267         return None;
268     }
269 
270     if let Some(icon) = get_cached_icon(&path) {
271         let icon_type = match get_icon_type(&icon) {
272             Some(x) => x,
273             _ => "x-icon",
274         };
275         return Some((icon, icon_type.to_string()));
276     }
277 
278     if CONFIG.disable_icon_download() {
279         return None;
280     }
281 
282     // Get the icon, or None in case of error
283     match download_icon(domain) {
284         Ok((icon, icon_type)) => {
285             save_icon(&path, &icon);
286             Some((icon, icon_type.unwrap_or("x-icon").to_string()))
287         }
288         Err(e) => {
289             error!("Error downloading icon: {:?}", e);
290             let miss_indicator = path + ".miss";
291             save_icon(&miss_indicator, &[]);
292             None
293         }
294     }
295 }
296 
get_cached_icon(path: &str) -> Option<Vec<u8>>297 fn get_cached_icon(path: &str) -> Option<Vec<u8>> {
298     // Check for expiration of successfully cached copy
299     if icon_is_expired(path) {
300         return None;
301     }
302 
303     // Try to read the cached icon, and return it if it exists
304     if let Ok(mut f) = File::open(path) {
305         let mut buffer = Vec::new();
306 
307         if f.read_to_end(&mut buffer).is_ok() {
308             return Some(buffer);
309         }
310     }
311 
312     None
313 }
314 
file_is_expired(path: &str, ttl: u64) -> Result<bool, Error>315 fn file_is_expired(path: &str, ttl: u64) -> Result<bool, Error> {
316     let meta = symlink_metadata(path)?;
317     let modified = meta.modified()?;
318     let age = SystemTime::now().duration_since(modified)?;
319 
320     Ok(ttl > 0 && ttl <= age.as_secs())
321 }
322 
icon_is_negcached(path: &str) -> bool323 fn icon_is_negcached(path: &str) -> bool {
324     let miss_indicator = path.to_owned() + ".miss";
325     let expired = file_is_expired(&miss_indicator, CONFIG.icon_cache_negttl());
326 
327     match expired {
328         // No longer negatively cached, drop the marker
329         Ok(true) => {
330             if let Err(e) = remove_file(&miss_indicator) {
331                 error!("Could not remove negative cache indicator for icon {:?}: {:?}", path, e);
332             }
333             false
334         }
335         // The marker hasn't expired yet.
336         Ok(false) => true,
337         // The marker is missing or inaccessible in some way.
338         Err(_) => false,
339     }
340 }
341 
icon_is_expired(path: &str) -> bool342 fn icon_is_expired(path: &str) -> bool {
343     let expired = file_is_expired(path, CONFIG.icon_cache_ttl());
344     expired.unwrap_or(true)
345 }
346 
347 struct Icon {
348     priority: u8,
349     href: String,
350 }
351 
352 impl Icon {
new(priority: u8, href: String) -> Self353     const fn new(priority: u8, href: String) -> Self {
354         Self {
355             priority,
356             href,
357         }
358     }
359 }
360 
361 /// Iterates over the HTML document to find <base href="http://domain.tld">
362 /// When found it will stop the iteration and the found base href will be shared deref via `base_href`.
363 ///
364 /// # Arguments
365 /// * `node` - A Parsed HTML document via html5ever::parse_document()
366 /// * `base_href` - a mutable url::Url which will be overwritten when a base href tag has been found.
367 ///
get_base_href(node: &std::rc::Rc<markup5ever_rcdom::Node>, base_href: &mut url::Url) -> bool368 fn get_base_href(node: &std::rc::Rc<markup5ever_rcdom::Node>, base_href: &mut url::Url) -> bool {
369     if let markup5ever_rcdom::NodeData::Element {
370         name,
371         attrs,
372         ..
373     } = &node.data
374     {
375         if name.local.as_ref() == "base" {
376             let attrs = attrs.borrow();
377             for attr in attrs.iter() {
378                 let attr_name = attr.name.local.as_ref();
379                 let attr_value = attr.value.as_ref();
380 
381                 if attr_name == "href" {
382                     debug!("Found base href: {}", attr_value);
383                     *base_href = match base_href.join(attr_value) {
384                         Ok(href) => href,
385                         _ => base_href.clone(),
386                     };
387                     return true;
388                 }
389             }
390             return true;
391         }
392     }
393 
394     // TODO: Might want to limit the recursion depth?
395     for child in node.children.borrow().iter() {
396         // Check if we got a true back and stop the iter.
397         // This means we found a <base> tag and can stop processing the html.
398         if get_base_href(child, base_href) {
399             return true;
400         }
401     }
402     false
403 }
404 
get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Vec<Icon>, url: &url::Url)405 fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Vec<Icon>, url: &url::Url) {
406     if let markup5ever_rcdom::NodeData::Element {
407         name,
408         attrs,
409         ..
410     } = &node.data
411     {
412         if name.local.as_ref() == "link" {
413             let mut has_rel = false;
414             let mut href = None;
415             let mut sizes = None;
416 
417             let attrs = attrs.borrow();
418             for attr in attrs.iter() {
419                 let attr_name = attr.name.local.as_ref();
420                 let attr_value = attr.value.as_ref();
421 
422                 if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) && !ICON_REL_BLACKLIST.is_match(attr_value)
423                 {
424                     has_rel = true;
425                 } else if attr_name == "href" {
426                     href = Some(attr_value);
427                 } else if attr_name == "sizes" {
428                     sizes = Some(attr_value);
429                 }
430             }
431 
432             if has_rel {
433                 if let Some(inner_href) = href {
434                     if let Ok(full_href) = url.join(inner_href).map(String::from) {
435                         let priority = get_icon_priority(&full_href, sizes);
436                         icons.push(Icon::new(priority, full_href));
437                     }
438                 }
439             }
440         }
441     }
442 
443     // TODO: Might want to limit the recursion depth?
444     for child in node.children.borrow().iter() {
445         get_favicons_node(child, icons, url);
446     }
447 }
448 
449 struct IconUrlResult {
450     iconlist: Vec<Icon>,
451     referer: String,
452 }
453 
454 /// Returns a IconUrlResult which holds a Vector IconList and a string which holds the referer.
455 /// There will always two items within the iconlist which holds http(s)://domain.tld/favicon.ico.
456 /// This does not mean that that location does exists, but it is the default location browser use.
457 ///
458 /// # Argument
459 /// * `domain` - A string which holds the domain with extension.
460 ///
461 /// # Example
462 /// ```
463 /// let icon_result = get_icon_url("github.com")?;
464 /// let icon_result = get_icon_url("vaultwarden.discourse.group")?;
465 /// ```
get_icon_url(domain: &str) -> Result<IconUrlResult, Error>466 fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
467     // Default URL with secure and insecure schemes
468     let ssldomain = format!("https://{}", domain);
469     let httpdomain = format!("http://{}", domain);
470 
471     // First check the domain as given during the request for both HTTPS and HTTP.
472     let resp = match get_page(&ssldomain).or_else(|_| get_page(&httpdomain)) {
473         Ok(c) => Ok(c),
474         Err(e) => {
475             let mut sub_resp = Err(e);
476 
477             // When the domain is not an IP, and has more then one dot, remove all subdomains.
478             let is_ip = domain.parse::<IpAddr>();
479             if is_ip.is_err() && domain.matches('.').count() > 1 {
480                 let mut domain_parts = domain.split('.');
481                 let base_domain = format!(
482                     "{base}.{tld}",
483                     tld = domain_parts.next_back().unwrap(),
484                     base = domain_parts.next_back().unwrap()
485                 );
486                 if is_valid_domain(&base_domain) {
487                     let sslbase = format!("https://{}", base_domain);
488                     let httpbase = format!("http://{}", base_domain);
489                     debug!("[get_icon_url]: Trying without subdomains '{}'", base_domain);
490 
491                     sub_resp = get_page(&sslbase).or_else(|_| get_page(&httpbase));
492                 }
493 
494             // When the domain is not an IP, and has less then 2 dots, try to add www. infront of it.
495             } else if is_ip.is_err() && domain.matches('.').count() < 2 {
496                 let www_domain = format!("www.{}", domain);
497                 if is_valid_domain(&www_domain) {
498                     let sslwww = format!("https://{}", www_domain);
499                     let httpwww = format!("http://{}", www_domain);
500                     debug!("[get_icon_url]: Trying with www. prefix '{}'", www_domain);
501 
502                     sub_resp = get_page(&sslwww).or_else(|_| get_page(&httpwww));
503                 }
504             }
505 
506             sub_resp
507         }
508     };
509 
510     // Create the iconlist
511     let mut iconlist: Vec<Icon> = Vec::new();
512     let mut referer = String::from("");
513 
514     if let Ok(content) = resp {
515         // Extract the URL from the respose in case redirects occured (like @ gitlab.com)
516         let url = content.url().clone();
517 
518         // Set the referer to be used on the final request, some sites check this.
519         // Mostly used to prevent direct linking and other security resons.
520         referer = url.as_str().to_string();
521 
522         // Add the default favicon.ico to the list with the domain the content responded from.
523         iconlist.push(Icon::new(35, String::from(url.join("/favicon.ico").unwrap())));
524 
525         // 384KB should be more than enough for the HTML, though as we only really need the HTML header.
526         let mut limited_reader = content.take(384 * 1024);
527 
528         use html5ever::tendril::TendrilSink;
529         let dom = html5ever::parse_document(markup5ever_rcdom::RcDom::default(), Default::default())
530             .from_utf8()
531             .read_from(&mut limited_reader)?;
532 
533         let mut base_url: url::Url = url;
534         get_base_href(&dom.document, &mut base_url);
535         get_favicons_node(&dom.document, &mut iconlist, &base_url);
536     } else {
537         // Add the default favicon.ico to the list with just the given domain
538         iconlist.push(Icon::new(35, format!("{}/favicon.ico", ssldomain)));
539         iconlist.push(Icon::new(35, format!("{}/favicon.ico", httpdomain)));
540     }
541 
542     // Sort the iconlist by priority
543     iconlist.sort_by_key(|x| x.priority);
544 
545     // There always is an icon in the list, so no need to check if it exists, and just return the first one
546     Ok(IconUrlResult {
547         iconlist,
548         referer,
549     })
550 }
551 
get_page(url: &str) -> Result<Response, Error>552 fn get_page(url: &str) -> Result<Response, Error> {
553     get_page_with_referer(url, "")
554 }
555 
get_page_with_referer(url: &str, referer: &str) -> Result<Response, Error>556 fn get_page_with_referer(url: &str, referer: &str) -> Result<Response, Error> {
557     if is_domain_blacklisted(url::Url::parse(url).unwrap().host_str().unwrap_or_default()) {
558         err!("Favicon resolves to a blacklisted domain or IP!", url);
559     }
560 
561     let mut client = CLIENT.get(url);
562     if !referer.is_empty() {
563         client = client.header("Referer", referer)
564     }
565 
566     match client.send() {
567         Ok(c) => c.error_for_status().map_err(Into::into),
568         Err(e) => err_silent!(format!("{}", e)),
569     }
570 }
571 
572 /// Returns a Integer with the priority of the type of the icon which to prefer.
573 /// The lower the number the better.
574 ///
575 /// # Arguments
576 /// * `href`  - A string which holds the href value or relative path.
577 /// * `sizes` - The size of the icon if available as a <width>x<height> value like 32x32.
578 ///
579 /// # Example
580 /// ```
581 /// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32");
582 /// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", "");
583 /// ```
get_icon_priority(href: &str, sizes: Option<&str>) -> u8584 fn get_icon_priority(href: &str, sizes: Option<&str>) -> u8 {
585     // Check if there is a dimension set
586     let (width, height) = parse_sizes(sizes);
587 
588     // Check if there is a size given
589     if width != 0 && height != 0 {
590         // Only allow square dimensions
591         if width == height {
592             // Change priority by given size
593             if width == 32 {
594                 1
595             } else if width == 64 {
596                 2
597             } else if (24..=192).contains(&width) {
598                 3
599             } else if width == 16 {
600                 4
601             } else {
602                 5
603             }
604         // There are dimensions available, but the image is not a square
605         } else {
606             200
607         }
608     } else {
609         // Change priority by file extension
610         if href.ends_with(".png") {
611             10
612         } else if href.ends_with(".jpg") || href.ends_with(".jpeg") {
613             20
614         } else {
615             30
616         }
617     }
618 }
619 
620 /// Returns a Tuple with the width and hight as a seperate value extracted from the sizes attribute
621 /// It will return 0 for both values if no match has been found.
622 ///
623 /// # Arguments
624 /// * `sizes` - The size of the icon if available as a <width>x<height> value like 32x32.
625 ///
626 /// # Example
627 /// ```
628 /// let (width, height) = parse_sizes("64x64"); // (64, 64)
629 /// let (width, height) = parse_sizes("x128x128"); // (128, 128)
630 /// let (width, height) = parse_sizes("32"); // (0, 0)
631 /// ```
parse_sizes(sizes: Option<&str>) -> (u16, u16)632 fn parse_sizes(sizes: Option<&str>) -> (u16, u16) {
633     let mut width: u16 = 0;
634     let mut height: u16 = 0;
635 
636     if let Some(sizes) = sizes {
637         match ICON_SIZE_REGEX.captures(sizes.trim()) {
638             None => {}
639             Some(dimensions) => {
640                 if dimensions.len() >= 3 {
641                     width = dimensions[1].parse::<u16>().unwrap_or_default();
642                     height = dimensions[2].parse::<u16>().unwrap_or_default();
643                 }
644             }
645         }
646     }
647 
648     (width, height)
649 }
650 
download_icon(domain: &str) -> Result<(Vec<u8>, Option<&str>), Error>651 fn download_icon(domain: &str) -> Result<(Vec<u8>, Option<&str>), Error> {
652     if is_domain_blacklisted(domain) {
653         err_silent!("Domain is blacklisted", domain)
654     }
655 
656     let icon_result = get_icon_url(domain)?;
657 
658     let mut buffer = Vec::new();
659     let mut icon_type: Option<&str> = None;
660 
661     use data_url::DataUrl;
662 
663     for icon in icon_result.iconlist.iter().take(5) {
664         if icon.href.starts_with("data:image") {
665             let datauri = DataUrl::process(&icon.href).unwrap();
666             // Check if we are able to decode the data uri
667             match datauri.decode_to_vec() {
668                 Ok((body, _fragment)) => {
669                     // Also check if the size is atleast 67 bytes, which seems to be the smallest png i could create
670                     if body.len() >= 67 {
671                         // Check if the icon type is allowed, else try an icon from the list.
672                         icon_type = get_icon_type(&body);
673                         if icon_type.is_none() {
674                             debug!("Icon from {} data:image uri, is not a valid image type", domain);
675                             continue;
676                         }
677                         info!("Extracted icon from data:image uri for {}", domain);
678                         buffer = body;
679                         break;
680                     }
681                 }
682                 _ => debug!("Extracted icon from data:image uri is invalid"),
683             };
684         } else {
685             match get_page_with_referer(&icon.href, &icon_result.referer) {
686                 Ok(mut res) => {
687                     res.copy_to(&mut buffer)?;
688                     // Check if the icon type is allowed, else try an icon from the list.
689                     icon_type = get_icon_type(&buffer);
690                     if icon_type.is_none() {
691                         buffer.clear();
692                         debug!("Icon from {}, is not a valid image type", icon.href);
693                         continue;
694                     }
695                     info!("Downloaded icon from {}", icon.href);
696                     break;
697                 }
698                 Err(e) => debug!("{:?}", e),
699             };
700         }
701     }
702 
703     if buffer.is_empty() {
704         err_silent!("Empty response or unable find a valid icon", domain);
705     }
706 
707     Ok((buffer, icon_type))
708 }
709 
save_icon(path: &str, icon: &[u8])710 fn save_icon(path: &str, icon: &[u8]) {
711     match File::create(path) {
712         Ok(mut f) => {
713             f.write_all(icon).expect("Error writing icon file");
714         }
715         Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => {
716             create_dir_all(&CONFIG.icon_cache_folder()).expect("Error creating icon cache");
717         }
718         Err(e) => {
719             warn!("Icon save error: {:?}", e);
720         }
721     }
722 }
723 
get_icon_type(bytes: &[u8]) -> Option<&'static str>724 fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
725     match bytes {
726         [137, 80, 78, 71, ..] => Some("png"),
727         [0, 0, 1, 0, ..] => Some("x-icon"),
728         [82, 73, 70, 70, ..] => Some("webp"),
729         [255, 216, 255, ..] => Some("jpeg"),
730         [71, 73, 70, 56, ..] => Some("gif"),
731         [66, 77, ..] => Some("bmp"),
732         _ => None,
733     }
734 }
735 
736 /// This is an implementation of the default Cookie Jar from Reqwest and reqwest_cookie_store build by pfernie.
737 /// The default cookie jar used by Reqwest keeps all the cookies based upon the Max-Age or Expires which could be a long time.
738 /// That could be used for tracking, to prevent this we force the lifespan of the cookies to always be max two minutes.
739 /// A Cookie Jar is needed because some sites force a redirect with cookies to verify if a request uses cookies or not.
740 use cookie_store::CookieStore;
741 #[derive(Default)]
742 pub struct Jar(RwLock<CookieStore>);
743 
744 impl reqwest::cookie::CookieStore for Jar {
set_cookies(&self, cookie_headers: &mut dyn Iterator<Item = &header::HeaderValue>, url: &url::Url)745     fn set_cookies(&self, cookie_headers: &mut dyn Iterator<Item = &header::HeaderValue>, url: &url::Url) {
746         use cookie::{Cookie as RawCookie, ParseError as RawCookieParseError};
747         use time::Duration;
748 
749         let mut cookie_store = self.0.write().unwrap();
750         let cookies = cookie_headers.filter_map(|val| {
751             std::str::from_utf8(val.as_bytes())
752                 .map_err(RawCookieParseError::from)
753                 .and_then(RawCookie::parse)
754                 .map(|mut c| {
755                     c.set_expires(None);
756                     c.set_max_age(Some(Duration::minutes(2)));
757                     c.into_owned()
758                 })
759                 .ok()
760         });
761         cookie_store.store_response_cookies(cookies, url);
762     }
763 
cookies(&self, url: &url::Url) -> Option<header::HeaderValue>764     fn cookies(&self, url: &url::Url) -> Option<header::HeaderValue> {
765         use bytes::Bytes;
766 
767         let cookie_store = self.0.read().unwrap();
768         let s = cookie_store
769             .get_request_values(url)
770             .map(|(name, value)| format!("{}={}", name, value))
771             .collect::<Vec<_>>()
772             .join("; ");
773 
774         if s.is_empty() {
775             return None;
776         }
777 
778         header::HeaderValue::from_maybe_shared(Bytes::from(s)).ok()
779     }
780 }
781