1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
10 use std::cmp;
11 use std::fmt::{self, Formatter};
12 use std::io;
13 use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
14 use std::vec;
15 use parser::{ParseResult, ParseError};
16 use percent_encoding::{percent_decode, utf8_percent_encode, SIMPLE_ENCODE_SET};
17 use idna;
18 
19 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
20 pub enum HostInternal {
21     None,
22     Domain,
23     Ipv4(Ipv4Addr),
24     Ipv6(Ipv6Addr),
25 }
26 
27 #[cfg(feature = "heapsize")]
28 known_heap_size!(0, HostInternal);
29 
30 #[cfg(feature="serde")]
31 impl ::serde::Serialize for HostInternal {
serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: ::serde::Serializer32     fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: ::serde::Serializer {
33         // This doesn’t use `derive` because that involves
34         // large dependencies (that take a long time to build), and
35         // either Macros 1.1 which are not stable yet or a cumbersome build script.
36         //
37         // Implementing `Serializer` correctly for an enum is tricky,
38         // so let’s use existing enums that already do.
39         use std::net::IpAddr;
40         match *self {
41             HostInternal::None => None,
42             HostInternal::Domain => Some(None),
43             HostInternal::Ipv4(addr) => Some(Some(IpAddr::V4(addr))),
44             HostInternal::Ipv6(addr) => Some(Some(IpAddr::V6(addr))),
45         }.serialize(serializer)
46     }
47 }
48 
49 #[cfg(feature="serde")]
50 impl ::serde::Deserialize for HostInternal {
deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer51     fn deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer {
52         use std::net::IpAddr;
53         Ok(match ::serde::Deserialize::deserialize(deserializer)? {
54             None => HostInternal::None,
55             Some(None) => HostInternal::Domain,
56             Some(Some(IpAddr::V4(addr))) => HostInternal::Ipv4(addr),
57             Some(Some(IpAddr::V6(addr))) => HostInternal::Ipv6(addr),
58         })
59     }
60 }
61 
62 impl<S> From<Host<S>> for HostInternal {
from(host: Host<S>) -> HostInternal63     fn from(host: Host<S>) -> HostInternal {
64         match host {
65             Host::Domain(_) => HostInternal::Domain,
66             Host::Ipv4(address) => HostInternal::Ipv4(address),
67             Host::Ipv6(address) => HostInternal::Ipv6(address),
68         }
69     }
70 }
71 
72 /// The host name of an URL.
73 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
74 pub enum Host<S=String> {
75     /// A DNS domain name, as '.' dot-separated labels.
76     /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
77     /// a special URL, or percent encoded for non-special URLs. Hosts for
78     /// non-special URLs are also called opaque hosts.
79     Domain(S),
80 
81     /// An IPv4 address.
82     /// `Url::host_str` returns the serialization of this address,
83     /// as four decimal integers separated by `.` dots.
84     Ipv4(Ipv4Addr),
85 
86     /// An IPv6 address.
87     /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
88     /// in the format per [RFC 5952 *A Recommendation
89     /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
90     /// lowercase hexadecimal with maximal `::` compression.
91     Ipv6(Ipv6Addr),
92 }
93 
94 #[cfg(feature="serde")]
95 impl<S: ::serde::Serialize>  ::serde::Serialize for Host<S> {
serialize<R>(&self, serializer: &mut R) -> Result<(), R::Error> where R: ::serde::Serializer96     fn serialize<R>(&self, serializer: &mut R) -> Result<(), R::Error> where R: ::serde::Serializer {
97         use std::net::IpAddr;
98         match *self {
99             Host::Domain(ref s) => Ok(s),
100             Host::Ipv4(addr) => Err(IpAddr::V4(addr)),
101             Host::Ipv6(addr) => Err(IpAddr::V6(addr)),
102         }.serialize(serializer)
103     }
104 }
105 
106 #[cfg(feature="serde")]
107 impl<S: ::serde::Deserialize> ::serde::Deserialize for Host<S> {
deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer108     fn deserialize<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: ::serde::Deserializer {
109         use std::net::IpAddr;
110         Ok(match ::serde::Deserialize::deserialize(deserializer)? {
111             Ok(s) => Host::Domain(s),
112             Err(IpAddr::V4(addr)) => Host::Ipv4(addr),
113             Err(IpAddr::V6(addr)) => Host::Ipv6(addr),
114         })
115     }
116 }
117 
118 #[cfg(feature = "heapsize")]
119 impl<S: HeapSizeOf> HeapSizeOf for Host<S> {
heap_size_of_children(&self) -> usize120     fn heap_size_of_children(&self) -> usize {
121         match *self {
122             Host::Domain(ref s) => s.heap_size_of_children(),
123             _ => 0,
124         }
125     }
126 }
127 
128 impl<'a> Host<&'a str> {
129     /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> Host<String>130     pub fn to_owned(&self) -> Host<String> {
131         match *self {
132             Host::Domain(domain) => Host::Domain(domain.to_owned()),
133             Host::Ipv4(address) => Host::Ipv4(address),
134             Host::Ipv6(address) => Host::Ipv6(address),
135         }
136     }
137 }
138 
139 impl Host<String> {
140     /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
141     ///
142     /// <https://url.spec.whatwg.org/#host-parsing>
parse(input: &str) -> Result<Self, ParseError>143     pub fn parse(input: &str) -> Result<Self, ParseError> {
144         if input.starts_with('[') {
145             if !input.ends_with(']') {
146                 return Err(ParseError::InvalidIpv6Address)
147             }
148             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
149         }
150         let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
151         let domain = idna::domain_to_ascii(&domain)?;
152         if domain.find(|c| matches!(c,
153             '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
154         )).is_some() {
155             return Err(ParseError::InvalidDomainCharacter)
156         }
157         if let Some(address) = parse_ipv4addr(&domain)? {
158             Ok(Host::Ipv4(address))
159         } else {
160             Ok(Host::Domain(domain.into()))
161         }
162     }
163 
164     // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
parse_opaque(input: &str) -> Result<Self, ParseError>165     pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
166         if input.starts_with('[') {
167             if !input.ends_with(']') {
168                 return Err(ParseError::InvalidIpv6Address)
169             }
170             return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
171         }
172         if input.find(|c| matches!(c,
173             '\0' | '\t' | '\n' | '\r' | ' ' | '#' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
174         )).is_some() {
175             return Err(ParseError::InvalidDomainCharacter)
176         }
177         let s = utf8_percent_encode(input, SIMPLE_ENCODE_SET).to_string();
178         Ok(Host::Domain(s))
179     }
180 }
181 
182 impl<S: AsRef<str>> fmt::Display for Host<S> {
fmt(&self, f: &mut Formatter) -> fmt::Result183     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
184         match *self {
185             Host::Domain(ref domain) => domain.as_ref().fmt(f),
186             Host::Ipv4(ref addr) => addr.fmt(f),
187             Host::Ipv6(ref addr) => {
188                 f.write_str("[")?;
189                 write_ipv6(addr, f)?;
190                 f.write_str("]")
191             }
192         }
193     }
194 }
195 
196 /// This mostly exists because coherence rules don’t allow us to implement
197 /// `ToSocketAddrs for (Host<S>, u16)`.
198 #[derive(Clone, Debug)]
199 pub struct HostAndPort<S=String> {
200     pub host: Host<S>,
201     pub port: u16,
202 }
203 
204 impl<'a> HostAndPort<&'a str> {
205     /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
to_owned(&self) -> HostAndPort<String>206     pub fn to_owned(&self) -> HostAndPort<String> {
207         HostAndPort {
208             host: self.host.to_owned(),
209             port: self.port
210         }
211     }
212 }
213 
214 impl<S: AsRef<str>> fmt::Display for HostAndPort<S> {
fmt(&self, f: &mut Formatter) -> fmt::Result215     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
216         self.host.fmt(f)?;
217         f.write_str(":")?;
218         self.port.fmt(f)
219     }
220 }
221 
222 
223 impl<S: AsRef<str>> ToSocketAddrs for HostAndPort<S> {
224     type Iter = SocketAddrs;
225 
to_socket_addrs(&self) -> io::Result<Self::Iter>226     fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
227         let port = self.port;
228         match self.host {
229             Host::Domain(ref domain) => Ok(SocketAddrs {
230                 // FIXME: use std::net::lookup_host when it’s stable.
231                 state: SocketAddrsState::Domain((domain.as_ref(), port).to_socket_addrs()?)
232             }),
233             Host::Ipv4(address) => Ok(SocketAddrs {
234                 state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port)))
235             }),
236             Host::Ipv6(address) => Ok(SocketAddrs {
237                 state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0)))
238             }),
239         }
240     }
241 }
242 
243 /// Socket addresses for an URL.
244 #[derive(Debug)]
245 pub struct SocketAddrs {
246     state: SocketAddrsState
247 }
248 
249 #[derive(Debug)]
250 enum SocketAddrsState {
251     Domain(vec::IntoIter<SocketAddr>),
252     One(SocketAddr),
253     Done,
254 }
255 
256 impl Iterator for SocketAddrs {
257     type Item = SocketAddr;
next(&mut self) -> Option<SocketAddr>258     fn next(&mut self) -> Option<SocketAddr> {
259         match self.state {
260             SocketAddrsState::Domain(ref mut iter) => iter.next(),
261             SocketAddrsState::One(s) => {
262                 self.state = SocketAddrsState::Done;
263                 Some(s)
264             }
265             SocketAddrsState::Done => None
266         }
267     }
268 }
269 
write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result270 fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
271     let segments = addr.segments();
272     let (compress_start, compress_end) = longest_zero_sequence(&segments);
273     let mut i = 0;
274     while i < 8 {
275         if i == compress_start {
276             f.write_str(":")?;
277             if i == 0 {
278                 f.write_str(":")?;
279             }
280             if compress_end < 8 {
281                 i = compress_end;
282             } else {
283                 break;
284             }
285         }
286         write!(f, "{:x}", segments[i as usize])?;
287         if i < 7 {
288             f.write_str(":")?;
289         }
290         i += 1;
291     }
292     Ok(())
293 }
294 
295 // https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize)296 fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
297     let mut longest = -1;
298     let mut longest_length = -1;
299     let mut start = -1;
300     macro_rules! finish_sequence(
301         ($end: expr) => {
302             if start >= 0 {
303                 let length = $end - start;
304                 if length > longest_length {
305                     longest = start;
306                     longest_length = length;
307                 }
308             }
309         };
310     );
311     for i in 0..8 {
312         if pieces[i as usize] == 0 {
313             if start < 0 {
314                 start = i;
315             }
316         } else {
317             finish_sequence!(i);
318             start = -1;
319         }
320     }
321     finish_sequence!(8);
322     // https://url.spec.whatwg.org/#concept-ipv6-serializer
323     // step 3: ignore lone zeroes
324     if longest_length < 2 {
325         (-1, -2)
326     } else {
327         (longest, longest + longest_length)
328     }
329 }
330 
331 /// <https://url.spec.whatwg.org/#ipv4-number-parser>
parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()>332 fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
333     let mut r = 10;
334     if input.starts_with("0x") || input.starts_with("0X") {
335         input = &input[2..];
336         r = 16;
337     } else if input.len() >= 2 && input.starts_with('0') {
338         input = &input[1..];
339         r = 8;
340     }
341 
342     // At the moment we can't know the reason why from_str_radix fails
343     // https://github.com/rust-lang/rust/issues/22639
344     // So instead we check if the input looks like a real number and only return
345     // an error when it's an overflow.
346     let valid_number = match r {
347         8 => input.chars().all(|c| c >= '0' && c <='7'),
348         10 => input.chars().all(|c| c >= '0' && c <='9'),
349         16 => input.chars().all(|c| (c >= '0' && c <='9') || (c >='a' && c <= 'f') || (c >= 'A' && c <= 'F')),
350         _ => false
351     };
352 
353     if !valid_number {
354         return Ok(None);
355     }
356 
357     if input.is_empty() {
358         return Ok(Some(0));
359     }
360     if input.starts_with('+') {
361         return Ok(None);
362     }
363     match u32::from_str_radix(input, r) {
364         Ok(number) => Ok(Some(number)),
365         Err(_) => Err(()),
366     }
367 }
368 
369 /// <https://url.spec.whatwg.org/#concept-ipv4-parser>
parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>>370 fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
371     if input.is_empty() {
372         return Ok(None)
373     }
374     let mut parts: Vec<&str> = input.split('.').collect();
375     if parts.last() == Some(&"") {
376         parts.pop();
377     }
378     if parts.len() > 4 {
379         return Ok(None);
380     }
381     let mut numbers: Vec<u32> = Vec::new();
382     let mut overflow = false;
383     for part in parts {
384         if part == "" {
385             return Ok(None);
386         }
387         match parse_ipv4number(part) {
388             Ok(Some(n)) => numbers.push(n),
389             Ok(None) => return Ok(None),
390             Err(()) => overflow = true
391         };
392     }
393     if overflow {
394         return Err(ParseError::InvalidIpv4Address);
395     }
396     let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
397     // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
398     if ipv4 > u32::max_value() >> (8 * numbers.len() as u32)  {
399         return Err(ParseError::InvalidIpv4Address);
400     }
401     if numbers.iter().any(|x| *x > 255) {
402         return Err(ParseError::InvalidIpv4Address);
403     }
404     for (counter, n) in numbers.iter().enumerate() {
405         ipv4 += n << (8 * (3 - counter as u32))
406     }
407     Ok(Some(Ipv4Addr::from(ipv4)))
408 }
409 
410 /// <https://url.spec.whatwg.org/#concept-ipv6-parser>
parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr>411 fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
412     let input = input.as_bytes();
413     let len = input.len();
414     let mut is_ip_v4 = false;
415     let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
416     let mut piece_pointer = 0;
417     let mut compress_pointer = None;
418     let mut i = 0;
419 
420     if len < 2 {
421         return Err(ParseError::InvalidIpv6Address)
422     }
423 
424     if input[0] == b':' {
425         if input[1] != b':' {
426             return Err(ParseError::InvalidIpv6Address)
427         }
428         i = 2;
429         piece_pointer = 1;
430         compress_pointer = Some(1);
431     }
432 
433     while i < len {
434         if piece_pointer == 8 {
435             return Err(ParseError::InvalidIpv6Address)
436         }
437         if input[i] == b':' {
438             if compress_pointer.is_some() {
439                 return Err(ParseError::InvalidIpv6Address)
440             }
441             i += 1;
442             piece_pointer += 1;
443             compress_pointer = Some(piece_pointer);
444             continue
445         }
446         let start = i;
447         let end = cmp::min(len, start + 4);
448         let mut value = 0u16;
449         while i < end {
450             match (input[i] as char).to_digit(16) {
451                 Some(digit) => {
452                     value = value * 0x10 + digit as u16;
453                     i += 1;
454                 },
455                 None => break
456             }
457         }
458         if i < len {
459             match input[i] {
460                 b'.' => {
461                     if i == start {
462                         return Err(ParseError::InvalidIpv6Address)
463                     }
464                     i = start;
465                     if piece_pointer > 6 {
466                         return Err(ParseError::InvalidIpv6Address)
467                     }
468                     is_ip_v4 = true;
469                 },
470                 b':' => {
471                     i += 1;
472                     if i == len {
473                         return Err(ParseError::InvalidIpv6Address)
474                     }
475                 },
476                 _ => return Err(ParseError::InvalidIpv6Address)
477             }
478         }
479         if is_ip_v4 {
480             break
481         }
482         pieces[piece_pointer] = value;
483         piece_pointer += 1;
484     }
485 
486     if is_ip_v4 {
487         if piece_pointer > 6 {
488             return Err(ParseError::InvalidIpv6Address)
489         }
490         let mut numbers_seen = 0;
491         while i < len {
492             if numbers_seen > 0 {
493                 if numbers_seen < 4 && (i < len && input[i] == b'.') {
494                     i += 1
495                 } else {
496                     return Err(ParseError::InvalidIpv6Address)
497                 }
498             }
499 
500             let mut ipv4_piece = None;
501             while i < len {
502                 let digit = match input[i] {
503                     c @ b'0' ... b'9' => c - b'0',
504                     _ => break
505                 };
506                 match ipv4_piece {
507                     None => ipv4_piece = Some(digit as u16),
508                     Some(0) => return Err(ParseError::InvalidIpv6Address),  // No leading zero
509                     Some(ref mut v) => {
510                         *v = *v * 10 + digit as u16;
511                         if *v > 255 {
512                             return Err(ParseError::InvalidIpv6Address)
513                         }
514                     }
515                 }
516                 i += 1;
517             }
518 
519             pieces[piece_pointer] = if let Some(v) = ipv4_piece {
520                 pieces[piece_pointer] * 0x100 + v
521             } else {
522                 return Err(ParseError::InvalidIpv6Address)
523             };
524             numbers_seen += 1;
525 
526             if numbers_seen == 2 || numbers_seen == 4 {
527                 piece_pointer += 1;
528             }
529         }
530 
531         if numbers_seen != 4 {
532             return Err(ParseError::InvalidIpv6Address)
533         }
534     }
535 
536     if i < len {
537         return Err(ParseError::InvalidIpv6Address)
538     }
539 
540     match compress_pointer {
541         Some(compress_pointer) => {
542             let mut swaps = piece_pointer - compress_pointer;
543             piece_pointer = 7;
544             while swaps > 0 {
545                 pieces.swap(piece_pointer, compress_pointer + swaps - 1);
546                 swaps -= 1;
547                 piece_pointer -= 1;
548             }
549         }
550         _ => if piece_pointer != 8 {
551             return Err(ParseError::InvalidIpv6Address)
552         }
553     }
554     Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3],
555                      pieces[4], pieces[5], pieces[6], pieces[7]))
556 }
557