1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 use std::cmp;
10 use std::fmt::{self, Formatter};
11 use std::net::{Ipv4Addr, Ipv6Addr};
12 
13 use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
14 #[cfg(feature = "serde")]
15 use serde::{Deserialize, Serialize};
16 
17 use crate::parser::{ParseError, ParseResult};
18 
19 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
20 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
21 pub(crate) enum HostInternal {
22     None,
23     Domain,
24     Ipv4(Ipv4Addr),
25     Ipv6(Ipv6Addr),
26 }
27 
28 impl From<Host<String>> for HostInternal {
from(host: Host<String>) -> HostInternal29     fn from(host: Host<String>) -> HostInternal {
30         match host {
31             Host::Domain(ref s) if s.is_empty() => HostInternal::None,
32             Host::Domain(_) => HostInternal::Domain,
33             Host::Ipv4(address) => HostInternal::Ipv4(address),
34             Host::Ipv6(address) => HostInternal::Ipv6(address),
35         }
36     }
37 }
38 
39 /// The host name of an URL.
40 #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
41 #[derive(Clone, Debug, Eq, Ord, PartialOrd, Hash)]
42 pub enum Host<S = String> {
43     /// A DNS domain name, as '.' dot-separated labels.
44     /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
45     /// a special URL, or percent encoded for non-special URLs. Hosts for
46     /// non-special URLs are also called opaque hosts.
47     Domain(S),
48 
49     /// An IPv4 address.
50     /// `Url::host_str` returns the serialization of this address,
51     /// as four decimal integers separated by `.` dots.
52     Ipv4(Ipv4Addr),
53 
54     /// An IPv6 address.
55     /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
56     /// in the format per [RFC 5952 *A Recommendation
57     /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
58     /// lowercase hexadecimal with maximal `::` compression.
59     Ipv6(Ipv6Addr),
60 }
61 
62 impl<'a> Host<&'a str> {
63     /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>64     pub fn to_owned(&self) -> Host<String> {
65         match *self {
66             Host::Domain(domain) => Host::Domain(domain.to_owned()),
67             Host::Ipv4(address) => Host::Ipv4(address),
68             Host::Ipv6(address) => Host::Ipv6(address),
69         }
70     }
71 }
72 
73 impl Host<String> {
74     /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
75     ///
76     /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>77     pub fn parse(input: &str) -> Result<Self, ParseError> {
78         if input.starts_with('[') {
79             if !input.ends_with(']') {
80                 return Err(ParseError::InvalidIpv6Address);
81             }
82             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
83         }
84         let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
85         let domain = idna::domain_to_ascii(&domain)?;
86         if domain.is_empty() {
87             return Err(ParseError::EmptyHost);
88         }
89 
90         let is_invalid_domain_char = |c| {
91             matches!(
92                 c,
93                 '\0' | '\t'
94                     | '\n'
95                     | '\r'
96                     | ' '
97                     | '#'
98                     | '%'
99                     | '/'
100                     | ':'
101                     | '<'
102                     | '>'
103                     | '?'
104                     | '@'
105                     | '['
106                     | '\\'
107                     | ']'
108                     | '^'
109             )
110         };
111 
112         if domain.find(is_invalid_domain_char).is_some() {
113             Err(ParseError::InvalidDomainCharacter)
114         } else if let Some(address) = parse_ipv4addr(&domain)? {
115             Ok(Host::Ipv4(address))
116         } else {
117             Ok(Host::Domain(domain))
118         }
119     }
120 
121     // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>122     pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
123         if input.starts_with('[') {
124             if !input.ends_with(']') {
125                 return Err(ParseError::InvalidIpv6Address);
126             }
127             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
128         }
129 
130         let is_invalid_host_char = |c| {
131             matches!(
132                 c,
133                 '\0' | '\t'
134                     | '\n'
135                     | '\r'
136                     | ' '
137                     | '#'
138                     | '/'
139                     | ':'
140                     | '<'
141                     | '>'
142                     | '?'
143                     | '@'
144                     | '['
145                     | '\\'
146                     | ']'
147                     | '^'
148             )
149         };
150 
151         if input.find(is_invalid_host_char).is_some() {
152             Err(ParseError::InvalidDomainCharacter)
153         } else {
154             Ok(Host::Domain(
155                 utf8_percent_encode(input, CONTROLS).to_string(),
156             ))
157         }
158     }
159 }
160 
161 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter<'_>) -> fmt::Result162     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
163         match *self {
164             Host::Domain(ref domain) => domain.as_ref().fmt(f),
165             Host::Ipv4(ref addr) => addr.fmt(f),
166             Host::Ipv6(ref addr) => {
167                 f.write_str("[")?;
168                 write_ipv6(addr, f)?;
169                 f.write_str("]")
170             }
171         }
172     }
173 }
174 
175 impl<S, T> PartialEq<Host<T>> for Host<S>
176 where
177     S: PartialEq<T>,
178 {
eq(&self, other: &Host<T>) -> bool179     fn eq(&self, other: &Host<T>) -> bool {
180         match (self, other) {
181             (Host::Domain(a), Host::Domain(b)) => a == b,
182             (Host::Ipv4(a), Host::Ipv4(b)) => a == b,
183             (Host::Ipv6(a), Host::Ipv6(b)) => a == b,
184             (_, _) => false,
185         }
186     }
187 }
188 
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result189 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter<'_>) -> fmt::Result {
190     let segments = addr.segments();
191     let (compress_start, compress_end) = longest_zero_sequence(&segments);
192     let mut i = 0;
193     while i < 8 {
194         if i == compress_start {
195             f.write_str(":")?;
196             if i == 0 {
197                 f.write_str(":")?;
198             }
199             if compress_end < 8 {
200                 i = compress_end;
201             } else {
202                 break;
203             }
204         }
205         write!(f, "{:x}", segments[i as usize])?;
206         if i < 7 {
207             f.write_str(":")?;
208         }
209         i += 1;
210     }
211     Ok(())
212 }
213 
214 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)215 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
216     let mut longest = -1;
217     let mut longest_length = -1;
218     let mut start = -1;
219     macro_rules! finish_sequence(
220         ($end: expr) => {
221             if start >= 0 {
222                 let length = $end - start;
223                 if length > longest_length {
224                     longest = start;
225                     longest_length = length;
226                 }
227             }
228         };
229     );
230     for i in 0..8 {
231         if pieces[i as usize] == 0 {
232             if start < 0 {
233                 start = i;
234             }
235         } else {
236             finish_sequence!(i);
237             start = -1;
238         }
239     }
240     finish_sequence!(8);
241     // https://url.spec.whatwg.org/#concept-ipv6-serializer
242     // step 3: ignore lone zeroes
243     if longest_length < 2 {
244         (-1, -2)
245     } else {
246         (longest, longest + longest_length)
247     }
248 }
249 
250 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>251 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
252     let mut r = 10;
253     if input.starts_with("0x") || input.starts_with("0X") {
254         input = &input[2..];
255         r = 16;
256     } else if input.len() >= 2 && input.starts_with('0') {
257         input = &input[1..];
258         r = 8;
259     }
260 
261     // At the moment we can't know the reason why from_str_radix fails
262     // https://github.com/rust-lang/rust/issues/22639
263     // So instead we check if the input looks like a real number and only return
264     // an error when it's an overflow.
265     let valid_number = match r {
266         8 => input.chars().all(|c| ('0'..='7').contains(&c)),
267         10 => input.chars().all(|c| ('0'..='9').contains(&c)),
268         16 => input.chars().all(|c| {
269             ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
270         }),
271         _ => false,
272     };
273 
274     if !valid_number {
275         return Ok(None);
276     }
277 
278     if input.is_empty() {
279         return Ok(Some(0));
280     }
281     if input.starts_with('+') {
282         return Ok(None);
283     }
284     match u32::from_str_radix(input, r) {
285         Ok(number) => Ok(Some(number)),
286         Err(_) => Err(()),
287     }
288 }
289 
290 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>>291 fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
292     if input.is_empty() {
293         return Ok(None);
294     }
295     let mut parts: Vec<&str> = input.split('.').collect();
296     if parts.last() == Some(&"") {
297         parts.pop();
298     }
299     if parts.len() > 4 {
300         return Ok(None);
301     }
302     let mut numbers: Vec<u32> = Vec::new();
303     let mut overflow = false;
304     for part in parts {
305         if part.is_empty() {
306             return Ok(None);
307         }
308         match parse_ipv4number(part) {
309             Ok(Some(n)) => numbers.push(n),
310             Ok(None) => return Ok(None),
311             Err(()) => overflow = true,
312         };
313     }
314     if overflow {
315         return Err(ParseError::InvalidIpv4Address);
316     }
317     let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
318     // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
319     if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
320         return Err(ParseError::InvalidIpv4Address);
321     }
322     if numbers.iter().any(|x| *x > 255) {
323         return Err(ParseError::InvalidIpv4Address);
324     }
325     for (counter, n) in numbers.iter().enumerate() {
326         ipv4 += n << (8 * (3 - counter as u32))
327     }
328     Ok(Some(Ipv4Addr::from(ipv4)))
329 }
330 
331 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>332 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
333     let input = input.as_bytes();
334     let len = input.len();
335     let mut is_ip_v4 = false;
336     let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
337     let mut piece_pointer = 0;
338     let mut compress_pointer = None;
339     let mut i = 0;
340 
341     if len < 2 {
342         return Err(ParseError::InvalidIpv6Address);
343     }
344 
345     if input[0] == b':' {
346         if input[1] != b':' {
347             return Err(ParseError::InvalidIpv6Address);
348         }
349         i = 2;
350         piece_pointer = 1;
351         compress_pointer = Some(1);
352     }
353 
354     while i < len {
355         if piece_pointer == 8 {
356             return Err(ParseError::InvalidIpv6Address);
357         }
358         if input[i] == b':' {
359             if compress_pointer.is_some() {
360                 return Err(ParseError::InvalidIpv6Address);
361             }
362             i += 1;
363             piece_pointer += 1;
364             compress_pointer = Some(piece_pointer);
365             continue;
366         }
367         let start = i;
368         let end = cmp::min(len, start + 4);
369         let mut value = 0u16;
370         while i < end {
371             match (input[i] as char).to_digit(16) {
372                 Some(digit) => {
373                     value = value * 0x10 + digit as u16;
374                     i += 1;
375                 }
376                 None => break,
377             }
378         }
379         if i < len {
380             match input[i] {
381                 b'.' => {
382                     if i == start {
383                         return Err(ParseError::InvalidIpv6Address);
384                     }
385                     i = start;
386                     if piece_pointer > 6 {
387                         return Err(ParseError::InvalidIpv6Address);
388                     }
389                     is_ip_v4 = true;
390                 }
391                 b':' => {
392                     i += 1;
393                     if i == len {
394                         return Err(ParseError::InvalidIpv6Address);
395                     }
396                 }
397                 _ => return Err(ParseError::InvalidIpv6Address),
398             }
399         }
400         if is_ip_v4 {
401             break;
402         }
403         pieces[piece_pointer] = value;
404         piece_pointer += 1;
405     }
406 
407     if is_ip_v4 {
408         if piece_pointer > 6 {
409             return Err(ParseError::InvalidIpv6Address);
410         }
411         let mut numbers_seen = 0;
412         while i < len {
413             if numbers_seen > 0 {
414                 if numbers_seen < 4 && (i < len && input[i] == b'.') {
415                     i += 1
416                 } else {
417                     return Err(ParseError::InvalidIpv6Address);
418                 }
419             }
420 
421             let mut ipv4_piece = None;
422             while i < len {
423                 let digit = match input[i] {
424                     c @ b'0'..=b'9' => c - b'0',
425                     _ => break,
426                 };
427                 match ipv4_piece {
428                     None => ipv4_piece = Some(digit as u16),
429                     Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
430                     Some(ref mut v) => {
431                         *v = *v * 10 + digit as u16;
432                         if *v > 255 {
433                             return Err(ParseError::InvalidIpv6Address);
434                         }
435                     }
436                 }
437                 i += 1;
438             }
439 
440             pieces[piece_pointer] = if let Some(v) = ipv4_piece {
441                 pieces[piece_pointer] * 0x100 + v
442             } else {
443                 return Err(ParseError::InvalidIpv6Address);
444             };
445             numbers_seen += 1;
446 
447             if numbers_seen == 2 || numbers_seen == 4 {
448                 piece_pointer += 1;
449             }
450         }
451 
452         if numbers_seen != 4 {
453             return Err(ParseError::InvalidIpv6Address);
454         }
455     }
456 
457     if i < len {
458         return Err(ParseError::InvalidIpv6Address);
459     }
460 
461     match compress_pointer {
462         Some(compress_pointer) => {
463             let mut swaps = piece_pointer - compress_pointer;
464             piece_pointer = 7;
465             while swaps > 0 {
466                 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
467                 swaps -= 1;
468                 piece_pointer -= 1;
469             }
470         }
471         _ => {
472             if piece_pointer != 8 {
473                 return Err(ParseError::InvalidIpv6Address);
474             }
475         }
476     }
477     Ok(Ipv6Addr::new(
478         pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
479     ))
480 }
481