1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 #[allow(unused_imports, deprecated)]
10 use std::ascii::AsciiExt;
11 
12 use std::error::Error;
13 use std::fmt::{self, Formatter, Write};
14 use std::str;
15 
16 use Url;
17 use encoding::EncodingOverride;
18 use host::{Host, HostInternal};
19 use percent_encoding::{
20     utf8_percent_encode, percent_encode,
21     SIMPLE_ENCODE_SET, DEFAULT_ENCODE_SET, USERINFO_ENCODE_SET, QUERY_ENCODE_SET,
22     PATH_SEGMENT_ENCODE_SET
23 };
24 
25 define_encode_set! {
26     // The backslash (\) character is treated as a path separator in special URLs
27     // so it needs to be additionally escaped in that case.
28     pub SPECIAL_PATH_SEGMENT_ENCODE_SET = [PATH_SEGMENT_ENCODE_SET] | {'\\'}
29 }
30 
31 pub type ParseResult<T> = Result<T, ParseError>;
32 
33 macro_rules! simple_enum_error {
34     ($($name: ident => $description: expr,)+) => {
35         /// Errors that can occur during parsing.
36         #[derive(PartialEq, Eq, Clone, Copy, Debug)]
37         pub enum ParseError {
38             $(
39                 $name,
40             )+
41         }
42 
43         impl Error for ParseError {
44             fn description(&self) -> &str {
45                 match *self {
46                     $(
47                         ParseError::$name => $description,
48                     )+
49                 }
50             }
51         }
52     }
53 }
54 
55 simple_enum_error! {
56     EmptyHost => "empty host",
57     IdnaError => "invalid international domain name",
58     InvalidPort => "invalid port number",
59     InvalidIpv4Address => "invalid IPv4 address",
60     InvalidIpv6Address => "invalid IPv6 address",
61     InvalidDomainCharacter => "invalid domain character",
62     RelativeUrlWithoutBase => "relative URL without a base",
63     RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
64     SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
65     Overflow => "URLs more than 4 GB are not supported",
66 }
67 
68 #[cfg(feature = "heapsize")]
69 known_heap_size!(0, ParseError);
70 
71 impl fmt::Display for ParseError {
fmt(&self, fmt: &mut Formatter) -> fmt::Result72     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
73         self.description().fmt(fmt)
74     }
75 }
76 
77 impl From<::idna::uts46::Errors> for ParseError {
from(_: ::idna::uts46::Errors) -> ParseError78     fn from(_: ::idna::uts46::Errors) -> ParseError { ParseError::IdnaError }
79 }
80 
81 macro_rules! syntax_violation_enum {
82     ($($name: ident => $description: expr,)+) => {
83         /// Non-fatal syntax violations that can occur during parsing.
84         #[derive(PartialEq, Eq, Clone, Copy, Debug)]
85         pub enum SyntaxViolation {
86             $(
87                 $name,
88             )+
89         }
90 
91         impl SyntaxViolation {
92             pub fn description(&self) -> &'static str {
93                 match *self {
94                     $(
95                         SyntaxViolation::$name => $description,
96                     )+
97                 }
98             }
99         }
100     }
101 }
102 
103 syntax_violation_enum! {
104     Backslash => "backslash",
105     C0SpaceIgnored =>
106         "leading or trailing control or space character are ignored in URLs",
107     EmbeddedCredentials =>
108         "embedding authentication information (username or password) \
109          in an URL is not recommended",
110     ExpectedDoubleSlash => "expected //",
111     ExpectedFileDoubleSlash => "expected // after file:",
112     FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
113     NonUrlCodePoint => "non-URL code point",
114     NullInFragment => "NULL characters are ignored in URL fragment identifiers",
115     PercentDecode => "expected 2 hex digits after %",
116     TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
117     UnencodedAtSign => "unencoded @ sign in username or password",
118 }
119 
120 #[cfg(feature = "heapsize")]
121 known_heap_size!(0, SyntaxViolation);
122 
123 impl fmt::Display for SyntaxViolation {
fmt(&self, fmt: &mut Formatter) -> fmt::Result124     fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
125         self.description().fmt(fmt)
126     }
127 }
128 
129 #[derive(Copy, Clone)]
130 pub enum SchemeType {
131     File,
132     SpecialNotFile,
133     NotSpecial,
134 }
135 
136 impl SchemeType {
is_special(&self) -> bool137     pub fn is_special(&self) -> bool {
138         !matches!(*self, SchemeType::NotSpecial)
139     }
140 
is_file(&self) -> bool141     pub fn is_file(&self) -> bool {
142         matches!(*self, SchemeType::File)
143     }
144 
from(s: &str) -> Self145     pub fn from(s: &str) -> Self {
146         match s {
147             "http" | "https" | "ws" | "wss" | "ftp" | "gopher" => SchemeType::SpecialNotFile,
148             "file" => SchemeType::File,
149             _ => SchemeType::NotSpecial,
150         }
151     }
152 }
153 
default_port(scheme: &str) -> Option<u16>154 pub fn default_port(scheme: &str) -> Option<u16> {
155     match scheme {
156         "http" | "ws" => Some(80),
157         "https" | "wss" => Some(443),
158         "ftp" => Some(21),
159         "gopher" => Some(70),
160         _ => None,
161     }
162 }
163 
164 #[derive(Clone)]
165 pub struct Input<'i> {
166     chars: str::Chars<'i>,
167 }
168 
169 impl<'i> Input<'i> {
new(input: &'i str) -> Self170     pub fn new(input: &'i str) -> Self {
171         Input::with_log(input, ViolationFn::NoOp)
172     }
173 
with_log(original_input: &'i str, vfn: ViolationFn) -> Self174     pub fn with_log(original_input: &'i str, vfn: ViolationFn) -> Self {
175         let input = original_input.trim_matches(c0_control_or_space);
176         if vfn.is_set() {
177             if input.len() < original_input.len() {
178                 vfn.call(SyntaxViolation::C0SpaceIgnored)
179             }
180             if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
181                 vfn.call(SyntaxViolation::TabOrNewlineIgnored)
182             }
183         }
184         Input { chars: input.chars() }
185     }
186 
187     #[inline]
is_empty(&self) -> bool188     pub fn is_empty(&self) -> bool {
189         self.clone().next().is_none()
190     }
191 
192     #[inline]
starts_with<P: Pattern>(&self, p: P) -> bool193     fn starts_with<P: Pattern>(&self, p: P) -> bool {
194         p.split_prefix(&mut self.clone())
195     }
196 
197     #[inline]
split_prefix<P: Pattern>(&self, p: P) -> Option<Self>198     pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
199         let mut remaining = self.clone();
200         if p.split_prefix(&mut remaining) {
201             Some(remaining)
202         } else {
203             None
204         }
205     }
206 
207     #[inline]
split_first(&self) -> (Option<char>, Self)208     fn split_first(&self) -> (Option<char>, Self) {
209         let mut remaining = self.clone();
210         (remaining.next(), remaining)
211     }
212 
213     #[inline]
count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self)214     fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
215         let mut count = 0;
216         let mut remaining = self.clone();
217         loop {
218             let mut input = remaining.clone();
219             if matches!(input.next(), Some(c) if f(c)) {
220                 remaining = input;
221                 count += 1;
222             } else {
223                 return (count, remaining)
224             }
225         }
226     }
227 
228     #[inline]
next_utf8(&mut self) -> Option<(char, &'i str)>229     fn next_utf8(&mut self) -> Option<(char, &'i str)> {
230         loop {
231             let utf8 = self.chars.as_str();
232             match self.chars.next() {
233                 Some(c) => {
234                     if !matches!(c, '\t' | '\n' | '\r') {
235                         return Some((c, &utf8[..c.len_utf8()]))
236                     }
237                 }
238                 None => return None
239             }
240         }
241     }
242 }
243 
244 pub trait Pattern {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool245     fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool;
246 }
247 
248 impl Pattern for char {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool249     fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next() == Some(self) }
250 }
251 
252 impl<'a> Pattern for &'a str {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool253     fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
254         for c in self.chars() {
255             if input.next() != Some(c) {
256                 return false
257             }
258         }
259         true
260     }
261 }
262 
263 impl<F: FnMut(char) -> bool> Pattern for F {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool264     fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool { input.next().map_or(false, self) }
265 }
266 
267 impl<'i> Iterator for Input<'i> {
268     type Item = char;
next(&mut self) -> Option<char>269     fn next(&mut self) -> Option<char> {
270         self.chars.by_ref().find(|&c| !matches!(c, '\t' | '\n' | '\r'))
271     }
272 }
273 
274 /// Wrapper for syntax violation callback functions.
275 #[derive(Copy, Clone)]
276 pub enum ViolationFn<'a> {
277     NewFn(&'a (Fn(SyntaxViolation) + 'a)),
278     OldFn(&'a (Fn(&'static str) + 'a)),
279     NoOp
280 }
281 
282 impl<'a> ViolationFn<'a> {
283     /// Call with a violation.
call(self, v: SyntaxViolation)284     pub fn call(self, v: SyntaxViolation) {
285         match self {
286             ViolationFn::NewFn(f) => f(v),
287             ViolationFn::OldFn(f) => f(v.description()),
288             ViolationFn::NoOp => {}
289         }
290     }
291 
292     /// Call with a violation, if provided test returns true. Avoids
293     /// the test entirely if `NoOp`.
call_if<F>(self, v: SyntaxViolation, test: F) where F: Fn() -> bool294     pub fn call_if<F>(self, v: SyntaxViolation, test: F)
295         where F: Fn() -> bool
296     {
297         match self {
298             ViolationFn::NewFn(f) => if test() { f(v) },
299             ViolationFn::OldFn(f) => if test() { f(v.description()) },
300             ViolationFn::NoOp => {} // avoid test
301         }
302     }
303 
304     /// True if not `NoOp`
is_set(self) -> bool305     pub fn is_set(self) -> bool {
306         match self {
307             ViolationFn::NoOp => false,
308             _ => true
309         }
310     }
311 }
312 
313 impl<'a> fmt::Debug for ViolationFn<'a> {
fmt(&self, f: &mut Formatter) -> fmt::Result314     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
315         match *self {
316             ViolationFn::NewFn(_) => write!(f, "NewFn(Fn(SyntaxViolation))"),
317             ViolationFn::OldFn(_) => write!(f, "OldFn(Fn(&'static str))"),
318             ViolationFn::NoOp     => write!(f, "NoOp")
319         }
320     }
321 }
322 
323 pub struct Parser<'a> {
324     pub serialization: String,
325     pub base_url: Option<&'a Url>,
326     pub query_encoding_override: EncodingOverride,
327     pub violation_fn: ViolationFn<'a>,
328     pub context: Context,
329 }
330 
331 #[derive(PartialEq, Eq, Copy, Clone)]
332 pub enum Context {
333     UrlParser,
334     Setter,
335     PathSegmentSetter,
336 }
337 
338 impl<'a> Parser<'a> {
for_setter(serialization: String) -> Parser<'a>339     pub fn for_setter(serialization: String) -> Parser<'a> {
340         Parser {
341             serialization: serialization,
342             base_url: None,
343             query_encoding_override: EncodingOverride::utf8(),
344             violation_fn: ViolationFn::NoOp,
345             context: Context::Setter,
346         }
347     }
348 
349     /// https://url.spec.whatwg.org/#concept-basic-url-parser
parse_url(mut self, input: &str) -> ParseResult<Url>350     pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
351         let input = Input::with_log(input, self.violation_fn);
352         if let Ok(remaining) = self.parse_scheme(input.clone()) {
353             return self.parse_with_scheme(remaining)
354         }
355 
356         // No-scheme state
357         if let Some(base_url) = self.base_url {
358             if input.starts_with('#') {
359                 self.fragment_only(base_url, input)
360             } else if base_url.cannot_be_a_base() {
361                 Err(ParseError::RelativeUrlWithCannotBeABaseBase)
362             } else {
363                 let scheme_type = SchemeType::from(base_url.scheme());
364                 if scheme_type.is_file() {
365                     self.parse_file(input, Some(base_url))
366                 } else {
367                     self.parse_relative(input, scheme_type, base_url)
368                 }
369             }
370         } else {
371             Err(ParseError::RelativeUrlWithoutBase)
372         }
373     }
374 
parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()>375     pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
376         if input.is_empty() || !input.starts_with(ascii_alpha) {
377             return Err(())
378         }
379         debug_assert!(self.serialization.is_empty());
380         while let Some(c) = input.next() {
381             match c {
382                 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.' => {
383                     self.serialization.push(c.to_ascii_lowercase())
384                 }
385                 ':' => return Ok(input),
386                 _ => {
387                     self.serialization.clear();
388                     return Err(())
389                 }
390             }
391         }
392         // EOF before ':'
393         if self.context == Context::Setter {
394             Ok(input)
395         } else {
396             self.serialization.clear();
397             Err(())
398         }
399     }
400 
parse_with_scheme(mut self, input: Input) -> ParseResult<Url>401     fn parse_with_scheme(mut self, input: Input) -> ParseResult<Url> {
402         use SyntaxViolation::{ExpectedFileDoubleSlash, ExpectedDoubleSlash};
403         let scheme_end = to_u32(self.serialization.len())?;
404         let scheme_type = SchemeType::from(&self.serialization);
405         self.serialization.push(':');
406         match scheme_type {
407             SchemeType::File => {
408                 self.violation_fn.call_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
409                 let base_file_url = self.base_url.and_then(|base| {
410                     if base.scheme() == "file" { Some(base) } else { None }
411                 });
412                 self.serialization.clear();
413                 self.parse_file(input, base_file_url)
414             }
415             SchemeType::SpecialNotFile => {
416                 // special relative or authority state
417                 let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
418                 if let Some(base_url) = self.base_url {
419                     if slashes_count < 2 &&
420                             base_url.scheme() == &self.serialization[..scheme_end as usize] {
421                         // "Cannot-be-a-base" URLs only happen with "not special" schemes.
422                         debug_assert!(!base_url.cannot_be_a_base());
423                         self.serialization.clear();
424                         return self.parse_relative(input, scheme_type, base_url)
425                     }
426                 }
427                 // special authority slashes state
428                 self.violation_fn.call_if(ExpectedDoubleSlash, || {
429                     input.clone().take_while(|&c| matches!(c, '/' | '\\'))
430                     .collect::<String>() != "//"
431                 });
432                 self.after_double_slash(remaining, scheme_type, scheme_end)
433             }
434             SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end)
435         }
436     }
437 
438     /// Scheme other than file, http, https, ws, ws, ftp, gopher.
parse_non_special(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) -> ParseResult<Url>439     fn parse_non_special(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32)
440                          -> ParseResult<Url> {
441         // path or authority state (
442         if let Some(input) = input.split_prefix("//") {
443             return self.after_double_slash(input, scheme_type, scheme_end)
444         }
445         // Anarchist URL (no authority)
446         let path_start = to_u32(self.serialization.len())?;
447         let username_end = path_start;
448         let host_start = path_start;
449         let host_end = path_start;
450         let host = HostInternal::None;
451         let port = None;
452         let remaining = if let Some(input) = input.split_prefix('/') {
453             let path_start = self.serialization.len();
454             self.serialization.push('/');
455             self.parse_path(scheme_type, &mut false, path_start, input)
456         } else {
457             self.parse_cannot_be_a_base_path(input)
458         };
459         self.with_query_and_fragment(scheme_end, username_end, host_start,
460                                      host_end, host, port, path_start, remaining)
461     }
462 
parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult<Url>463     fn parse_file(mut self, input: Input, mut base_file_url: Option<&Url>) -> ParseResult<Url> {
464         use SyntaxViolation::Backslash;
465         // file state
466         debug_assert!(self.serialization.is_empty());
467         let (first_char, input_after_first_char) = input.split_first();
468         match first_char {
469             None => {
470                 if let Some(base_url) = base_file_url {
471                     // Copy everything except the fragment
472                     let before_fragment = match base_url.fragment_start {
473                         Some(i) => &base_url.serialization[..i as usize],
474                         None => &*base_url.serialization,
475                     };
476                     self.serialization.push_str(before_fragment);
477                     Ok(Url {
478                         serialization: self.serialization,
479                         fragment_start: None,
480                         ..*base_url
481                     })
482                 } else {
483                     self.serialization.push_str("file:///");
484                     let scheme_end = "file".len() as u32;
485                     let path_start = "file://".len() as u32;
486                     Ok(Url {
487                         serialization: self.serialization,
488                         scheme_end: scheme_end,
489                         username_end: path_start,
490                         host_start: path_start,
491                         host_end: path_start,
492                         host: HostInternal::None,
493                         port: None,
494                         path_start: path_start,
495                         query_start: None,
496                         fragment_start: None,
497                     })
498                 }
499             },
500             Some('?') => {
501                 if let Some(base_url) = base_file_url {
502                     // Copy everything up to the query string
503                     let before_query = match (base_url.query_start, base_url.fragment_start) {
504                         (None, None) => &*base_url.serialization,
505                         (Some(i), _) |
506                         (None, Some(i)) => base_url.slice(..i)
507                     };
508                     self.serialization.push_str(before_query);
509                     let (query_start, fragment_start) =
510                         self.parse_query_and_fragment(base_url.scheme_end, input)?;
511                     Ok(Url {
512                         serialization: self.serialization,
513                         query_start: query_start,
514                         fragment_start: fragment_start,
515                         ..*base_url
516                     })
517                 } else {
518                     self.serialization.push_str("file:///");
519                     let scheme_end = "file".len() as u32;
520                     let path_start = "file://".len() as u32;
521                     let (query_start, fragment_start) =
522                         self.parse_query_and_fragment(scheme_end, input)?;
523                     Ok(Url {
524                         serialization: self.serialization,
525                         scheme_end: scheme_end,
526                         username_end: path_start,
527                         host_start: path_start,
528                         host_end: path_start,
529                         host: HostInternal::None,
530                         port: None,
531                         path_start: path_start,
532                         query_start: query_start,
533                         fragment_start: fragment_start,
534                     })
535                 }
536             },
537             Some('#') => {
538                 if let Some(base_url) = base_file_url {
539                     self.fragment_only(base_url, input)
540                 } else {
541                     self.serialization.push_str("file:///");
542                     let scheme_end = "file".len() as u32;
543                     let path_start = "file://".len() as u32;
544                     let fragment_start = "file:///".len() as u32;
545                     self.serialization.push('#');
546                     self.parse_fragment(input_after_first_char);
547                     Ok(Url {
548                         serialization: self.serialization,
549                         scheme_end: scheme_end,
550                         username_end: path_start,
551                         host_start: path_start,
552                         host_end: path_start,
553                         host: HostInternal::None,
554                         port: None,
555                         path_start: path_start,
556                         query_start: None,
557                         fragment_start: Some(fragment_start),
558                     })
559                 }
560             }
561             Some('/') | Some('\\') => {
562                 self.violation_fn.call_if(Backslash, || first_char == Some('\\'));
563                 // file slash state
564                 let (next_char, input_after_next_char) = input_after_first_char.split_first();
565                 self.violation_fn.call_if(Backslash, || next_char == Some('\\'));
566                 if matches!(next_char, Some('/') | Some('\\')) {
567                     // file host state
568                     self.serialization.push_str("file://");
569                     let scheme_end = "file".len() as u32;
570                     let host_start = "file://".len() as u32;
571                     let (path_start, mut host, remaining) =
572                         self.parse_file_host(input_after_next_char)?;
573                     let mut host_end = to_u32(self.serialization.len())?;
574                     let mut has_host = !matches!(host, HostInternal::None);
575                     let remaining = if path_start {
576                         self.parse_path_start(SchemeType::File, &mut has_host, remaining)
577                     } else {
578                         let path_start = self.serialization.len();
579                         self.serialization.push('/');
580                         self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
581                     };
582                     // For file URLs that have a host and whose path starts
583                     // with the windows drive letter we just remove the host.
584                     if !has_host {
585                         self.serialization.drain(host_start as usize..host_end as usize);
586                         host_end = host_start;
587                         host = HostInternal::None;
588                     }
589                     let (query_start, fragment_start) =
590                         self.parse_query_and_fragment(scheme_end, remaining)?;
591                     Ok(Url {
592                         serialization: self.serialization,
593                         scheme_end: scheme_end,
594                         username_end: host_start,
595                         host_start: host_start,
596                         host_end: host_end,
597                         host: host,
598                         port: None,
599                         path_start: host_end,
600                         query_start: query_start,
601                         fragment_start: fragment_start,
602                     })
603                 } else {
604                     self.serialization.push_str("file:///");
605                     let scheme_end = "file".len() as u32;
606                     let path_start = "file://".len();
607                     if let Some(base_url) = base_file_url {
608                         let first_segment = base_url.path_segments().unwrap().next().unwrap();
609                         // FIXME: *normalized* drive letter
610                         if is_windows_drive_letter(first_segment) {
611                             self.serialization.push_str(first_segment);
612                             self.serialization.push('/');
613                         }
614                     }
615                     let remaining = self.parse_path(
616                         SchemeType::File, &mut false, path_start, input_after_first_char);
617                     let (query_start, fragment_start) =
618                         self.parse_query_and_fragment(scheme_end, remaining)?;
619                     let path_start = path_start as u32;
620                     Ok(Url {
621                         serialization: self.serialization,
622                         scheme_end: scheme_end,
623                         username_end: path_start,
624                         host_start: path_start,
625                         host_end: path_start,
626                         host: HostInternal::None,
627                         port: None,
628                         path_start: path_start,
629                         query_start: query_start,
630                         fragment_start: fragment_start,
631                     })
632                 }
633             }
634             _ => {
635                 if starts_with_windows_drive_letter_segment(&input) {
636                     base_file_url = None;
637                 }
638                 if let Some(base_url) = base_file_url {
639                     let before_query = match (base_url.query_start, base_url.fragment_start) {
640                         (None, None) => &*base_url.serialization,
641                         (Some(i), _) |
642                         (None, Some(i)) => base_url.slice(..i)
643                     };
644                     self.serialization.push_str(before_query);
645                     self.pop_path(SchemeType::File, base_url.path_start as usize);
646                     let remaining = self.parse_path(
647                         SchemeType::File, &mut true, base_url.path_start as usize, input);
648                     self.with_query_and_fragment(
649                         base_url.scheme_end, base_url.username_end, base_url.host_start,
650                         base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining)
651                 } else {
652                     self.serialization.push_str("file:///");
653                     let scheme_end = "file".len() as u32;
654                     let path_start = "file://".len();
655                     let remaining = self.parse_path(
656                         SchemeType::File, &mut false, path_start, input);
657                     let (query_start, fragment_start) =
658                         self.parse_query_and_fragment(scheme_end, remaining)?;
659                     let path_start = path_start as u32;
660                     Ok(Url {
661                         serialization: self.serialization,
662                         scheme_end: scheme_end,
663                         username_end: path_start,
664                         host_start: path_start,
665                         host_end: path_start,
666                         host: HostInternal::None,
667                         port: None,
668                         path_start: path_start,
669                         query_start: query_start,
670                         fragment_start: fragment_start,
671                     })
672                 }
673             }
674         }
675     }
676 
parse_relative(mut self, input: Input, scheme_type: SchemeType, base_url: &Url) -> ParseResult<Url>677     fn parse_relative(mut self, input: Input, scheme_type: SchemeType, base_url: &Url)
678                       -> ParseResult<Url> {
679         // relative state
680         debug_assert!(self.serialization.is_empty());
681         let (first_char, input_after_first_char) = input.split_first();
682         match first_char {
683             None => {
684                 // Copy everything except the fragment
685                 let before_fragment = match base_url.fragment_start {
686                     Some(i) => &base_url.serialization[..i as usize],
687                     None => &*base_url.serialization,
688                 };
689                 self.serialization.push_str(before_fragment);
690                 Ok(Url {
691                     serialization: self.serialization,
692                     fragment_start: None,
693                     ..*base_url
694                 })
695             },
696             Some('?') => {
697                 // Copy everything up to the query string
698                 let before_query = match (base_url.query_start, base_url.fragment_start) {
699                     (None, None) => &*base_url.serialization,
700                     (Some(i), _) |
701                     (None, Some(i)) => base_url.slice(..i)
702                 };
703                 self.serialization.push_str(before_query);
704                 let (query_start, fragment_start) =
705                     self.parse_query_and_fragment(base_url.scheme_end, input)?;
706                 Ok(Url {
707                     serialization: self.serialization,
708                     query_start: query_start,
709                     fragment_start: fragment_start,
710                     ..*base_url
711                 })
712             },
713             Some('#') => self.fragment_only(base_url, input),
714             Some('/') | Some('\\') => {
715                 let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
716                 if slashes_count >= 2 {
717                     self.violation_fn.call_if(SyntaxViolation::ExpectedDoubleSlash, || {
718                         input.clone().take_while(|&c| matches!(c, '/' | '\\'))
719                         .collect::<String>() != "//"
720                     });
721                     let scheme_end = base_url.scheme_end;
722                     debug_assert!(base_url.byte_at(scheme_end) == b':');
723                     self.serialization.push_str(base_url.slice(..scheme_end + 1));
724                     return self.after_double_slash(remaining, scheme_type, scheme_end)
725                 }
726                 let path_start = base_url.path_start;
727                 debug_assert!(base_url.byte_at(path_start) == b'/');
728                 self.serialization.push_str(base_url.slice(..path_start + 1));
729                 let remaining = self.parse_path(
730                     scheme_type, &mut true, path_start as usize, input_after_first_char);
731                 self.with_query_and_fragment(
732                     base_url.scheme_end, base_url.username_end, base_url.host_start,
733                     base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining)
734             }
735             _ => {
736                 let before_query = match (base_url.query_start, base_url.fragment_start) {
737                     (None, None) => &*base_url.serialization,
738                     (Some(i), _) |
739                     (None, Some(i)) => base_url.slice(..i)
740                 };
741                 self.serialization.push_str(before_query);
742                 // FIXME spec says just "remove last entry", not the "pop" algorithm
743                 self.pop_path(scheme_type, base_url.path_start as usize);
744                 let remaining = self.parse_path(
745                     scheme_type, &mut true, base_url.path_start as usize, input);
746                 self.with_query_and_fragment(
747                     base_url.scheme_end, base_url.username_end, base_url.host_start,
748                     base_url.host_end, base_url.host, base_url.port, base_url.path_start, remaining)
749             }
750         }
751     }
752 
after_double_slash(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32) -> ParseResult<Url>753     fn after_double_slash(mut self, input: Input, scheme_type: SchemeType, scheme_end: u32)
754                           -> ParseResult<Url> {
755         self.serialization.push('/');
756         self.serialization.push('/');
757         // authority state
758         let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
759         // host state
760         let host_start = to_u32(self.serialization.len())?;
761         let (host_end, host, port, remaining) =
762             self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
763         // path state
764         let path_start = to_u32(self.serialization.len())?;
765         let remaining = self.parse_path_start(
766             scheme_type, &mut true, remaining);
767         self.with_query_and_fragment(scheme_end, username_end, host_start,
768                                      host_end, host, port, path_start, remaining)
769     }
770 
771     /// Return (username_end, remaining)
parse_userinfo<'i>(&mut self, mut input: Input<'i>, scheme_type: SchemeType) -> ParseResult<(u32, Input<'i>)>772     fn parse_userinfo<'i>(&mut self, mut input: Input<'i>, scheme_type: SchemeType)
773                           -> ParseResult<(u32, Input<'i>)> {
774         let mut last_at = None;
775         let mut remaining = input.clone();
776         let mut char_count = 0;
777         while let Some(c) = remaining.next() {
778             match c {
779                 '@' => {
780                     if last_at.is_some() {
781                         self.violation_fn.call(SyntaxViolation::UnencodedAtSign)
782                     } else {
783                         self.violation_fn.call(SyntaxViolation::EmbeddedCredentials)
784                     }
785                     last_at = Some((char_count, remaining.clone()))
786                 },
787                 '/' | '?' | '#' => break,
788                 '\\' if scheme_type.is_special() => break,
789                 _ => (),
790             }
791             char_count += 1;
792         }
793         let (mut userinfo_char_count, remaining) = match last_at {
794             None => return Ok((to_u32(self.serialization.len())?, input)),
795             Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
796             Some(x) => x
797         };
798 
799         let mut username_end = None;
800         let mut has_password = false;
801         let mut has_username = false;
802         while userinfo_char_count > 0 {
803             let (c, utf8_c) = input.next_utf8().unwrap();
804             userinfo_char_count -= 1;
805             if c == ':' && username_end.is_none() {
806                 // Start parsing password
807                 username_end = Some(to_u32(self.serialization.len())?);
808                 // We don't add a colon if the password is empty
809                 if userinfo_char_count > 0 {
810                     self.serialization.push(':');
811                     has_password = true;
812                 }
813             } else {
814                 if !has_password {
815                     has_username = true;
816                 }
817                 self.check_url_code_point(c, &input);
818                 self.serialization.extend(utf8_percent_encode(utf8_c, USERINFO_ENCODE_SET));
819             }
820         }
821         let username_end = match username_end {
822             Some(i) => i,
823             None => to_u32(self.serialization.len())?,
824         };
825         if has_username || has_password {
826             self.serialization.push('@');
827         }
828         Ok((username_end, remaining))
829     }
830 
parse_host_and_port<'i>(&mut self, input: Input<'i>, scheme_end: u32, scheme_type: SchemeType) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)>831     fn parse_host_and_port<'i>(&mut self, input: Input<'i>,
832                                    scheme_end: u32, scheme_type: SchemeType)
833                                    -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
834         let (host, remaining) = Parser::parse_host(input, scheme_type)?;
835         write!(&mut self.serialization, "{}", host).unwrap();
836         let host_end = to_u32(self.serialization.len())?;
837         let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
838             let scheme = || default_port(&self.serialization[..scheme_end as usize]);
839             Parser::parse_port(remaining, scheme, self.context)?
840         } else {
841             (None, remaining)
842         };
843         if let Some(port) = port {
844             write!(&mut self.serialization, ":{}", port).unwrap()
845         }
846         Ok((host_end, host.into(), port, remaining))
847     }
848 
parse_host(mut input: Input, scheme_type: SchemeType) -> ParseResult<(Host<String>, Input)>849     pub fn parse_host(mut input: Input, scheme_type: SchemeType)
850                              -> ParseResult<(Host<String>, Input)> {
851         // Undo the Input abstraction here to avoid allocating in the common case
852         // where the host part of the input does not contain any tab or newline
853         let input_str = input.chars.as_str();
854         let mut inside_square_brackets = false;
855         let mut has_ignored_chars = false;
856         let mut non_ignored_chars = 0;
857         let mut bytes = 0;
858         for c in input_str.chars() {
859             match c {
860                 ':' if !inside_square_brackets => break,
861                 '\\' if scheme_type.is_special() => break,
862                 '/' | '?' | '#' => break,
863                 '\t' | '\n' | '\r' => {
864                     has_ignored_chars = true;
865                 }
866                 '[' => {
867                     inside_square_brackets = true;
868                     non_ignored_chars += 1
869                 }
870                 ']' => {
871                     inside_square_brackets = false;
872                     non_ignored_chars += 1
873                 }
874                 _ => non_ignored_chars += 1
875             }
876             bytes += c.len_utf8();
877         }
878         let replaced: String;
879         let host_str;
880         {
881             let host_input = input.by_ref().take(non_ignored_chars);
882             if has_ignored_chars {
883                 replaced = host_input.collect();
884                 host_str = &*replaced
885             } else {
886                 for _ in host_input {}
887                 host_str = &input_str[..bytes]
888             }
889         }
890         if scheme_type.is_special() && host_str.is_empty() {
891             return Err(ParseError::EmptyHost)
892         }
893         if !scheme_type.is_special() {
894             let host = Host::parse_opaque(host_str)?;
895             return Ok((host, input));
896         }
897         let host = Host::parse(host_str)?;
898         Ok((host, input))
899     }
900 
parse_file_host<'i>(&mut self, input: Input<'i>) -> ParseResult<(bool, HostInternal, Input<'i>)>901     pub fn parse_file_host<'i>(&mut self, input: Input<'i>)
902                                -> ParseResult<(bool, HostInternal, Input<'i>)> {
903         // Undo the Input abstraction here to avoid allocating in the common case
904         // where the host part of the input does not contain any tab or newline
905         let input_str = input.chars.as_str();
906         let mut has_ignored_chars = false;
907         let mut non_ignored_chars = 0;
908         let mut bytes = 0;
909         for c in input_str.chars() {
910             match c {
911                 '/' | '\\' | '?' | '#' => break,
912                 '\t' | '\n' | '\r' => has_ignored_chars = true,
913                 _ => non_ignored_chars += 1,
914             }
915             bytes += c.len_utf8();
916         }
917         let replaced: String;
918         let host_str;
919         let mut remaining = input.clone();
920         {
921             let host_input = remaining.by_ref().take(non_ignored_chars);
922             if has_ignored_chars {
923                 replaced = host_input.collect();
924                 host_str = &*replaced
925             } else {
926                 for _ in host_input {}
927                 host_str = &input_str[..bytes]
928             }
929         }
930         if is_windows_drive_letter(host_str) {
931             return Ok((false, HostInternal::None, input))
932         }
933         let host = if host_str.is_empty() {
934             HostInternal::None
935         } else {
936             match Host::parse(host_str)? {
937                 Host::Domain(ref d) if d == "localhost" => HostInternal::None,
938                 host => {
939                     write!(&mut self.serialization, "{}", host).unwrap();
940                     host.into()
941                 }
942             }
943         };
944         Ok((true, host, remaining))
945     }
946 
parse_port<P>(mut input: Input, default_port: P, context: Context) -> ParseResult<(Option<u16>, Input)> where P: Fn() -> Option<u16>947     pub fn parse_port<P>(mut input: Input, default_port: P,
948                                 context: Context)
949                                 -> ParseResult<(Option<u16>, Input)>
950                                 where P: Fn() -> Option<u16> {
951         let mut port: u32 = 0;
952         let mut has_any_digit = false;
953         while let (Some(c), remaining) = input.split_first() {
954             if let Some(digit) = c.to_digit(10) {
955                 port = port * 10 + digit;
956                 if port > ::std::u16::MAX as u32 {
957                     return Err(ParseError::InvalidPort)
958                 }
959                 has_any_digit = true;
960             } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
961                 return Err(ParseError::InvalidPort)
962             } else {
963                 break
964             }
965             input = remaining;
966         }
967         let mut opt_port = Some(port as u16);
968         if !has_any_digit || opt_port == default_port() {
969             opt_port = None;
970         }
971         Ok((opt_port, input))
972     }
973 
parse_path_start<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, mut input: Input<'i>) -> Input<'i>974     pub fn parse_path_start<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool,
975                             mut input: Input<'i>)
976                             -> Input<'i> {
977         // Path start state
978         match input.split_first() {
979             (Some('/'), remaining) => input = remaining,
980             (Some('\\'), remaining) => if scheme_type.is_special() {
981                 self.violation_fn.call(SyntaxViolation::Backslash);
982                 input = remaining
983             },
984             _ => {}
985         }
986         let path_start = self.serialization.len();
987         self.serialization.push('/');
988         self.parse_path(scheme_type, has_host, path_start, input)
989     }
990 
parse_path<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool, path_start: usize, mut input: Input<'i>) -> Input<'i>991     pub fn parse_path<'i>(&mut self, scheme_type: SchemeType, has_host: &mut bool,
992                           path_start: usize, mut input: Input<'i>)
993                           -> Input<'i> {
994         // Relative path state
995         debug_assert!(self.serialization.ends_with('/'));
996         loop {
997             let segment_start = self.serialization.len();
998             let mut ends_with_slash = false;
999             loop {
1000                 let input_before_c = input.clone();
1001                 let (c, utf8_c) = if let Some(x) = input.next_utf8() { x } else { break };
1002                 match c {
1003                     '/' if self.context != Context::PathSegmentSetter => {
1004                         ends_with_slash = true;
1005                         break
1006                     },
1007                     '\\' if self.context != Context::PathSegmentSetter &&
1008                             scheme_type.is_special() => {
1009                         self.violation_fn.call(SyntaxViolation::Backslash);
1010                         ends_with_slash = true;
1011                         break
1012                     },
1013                     '?' | '#' if self.context == Context::UrlParser => {
1014                         input = input_before_c;
1015                         break
1016                     },
1017                     _ => {
1018                         self.check_url_code_point(c, &input);
1019                         if self.context == Context::PathSegmentSetter {
1020                             if scheme_type.is_special() {
1021                                 self.serialization.extend(utf8_percent_encode(
1022                                     utf8_c, SPECIAL_PATH_SEGMENT_ENCODE_SET));
1023                             } else {
1024                                 self.serialization.extend(utf8_percent_encode(
1025                                     utf8_c, PATH_SEGMENT_ENCODE_SET));
1026                             }
1027                         } else {
1028                             self.serialization.extend(utf8_percent_encode(
1029                                 utf8_c, DEFAULT_ENCODE_SET));
1030                         }
1031                     }
1032                 }
1033             }
1034             match &self.serialization[segment_start..] {
1035                 ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" | ".%2E"  => {
1036                     debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1037                     self.serialization.truncate(segment_start - 1);  // Truncate "/.."
1038                     self.pop_path(scheme_type, path_start);
1039                     if !self.serialization[path_start..].ends_with('/') {
1040                         self.serialization.push('/')
1041                     }
1042                 },
1043                 "." | "%2e" | "%2E" => {
1044                     self.serialization.truncate(segment_start);
1045                 },
1046                 _ => {
1047                     if scheme_type.is_file() && is_windows_drive_letter(
1048                         &self.serialization[path_start + 1..]
1049                     ) {
1050                         if self.serialization.ends_with('|') {
1051                             self.serialization.pop();
1052                             self.serialization.push(':');
1053                         }
1054                         if *has_host {
1055                             self.violation_fn.call(SyntaxViolation::FileWithHostAndWindowsDrive);
1056                             *has_host = false;  // FIXME account for this in callers
1057                         }
1058                     }
1059                     if ends_with_slash {
1060                         self.serialization.push('/')
1061                     }
1062                 }
1063             }
1064             if !ends_with_slash {
1065                 break
1066             }
1067         }
1068         input
1069     }
1070 
1071     /// https://url.spec.whatwg.org/#pop-a-urls-path
pop_path(&mut self, scheme_type: SchemeType, path_start: usize)1072     fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1073         if self.serialization.len() > path_start {
1074             let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1075             // + 1 since rfind returns the position before the slash.
1076             let segment_start = path_start + slash_position + 1;
1077             // Don’t pop a Windows drive letter
1078             // FIXME: *normalized* Windows drive letter
1079             if !(
1080                 scheme_type.is_file() &&
1081                 is_windows_drive_letter(&self.serialization[segment_start..])
1082             ) {
1083                 self.serialization.truncate(segment_start);
1084             }
1085         }
1086 
1087     }
1088 
parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i>1089     pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1090         loop {
1091             let input_before_c = input.clone();
1092             match input.next_utf8() {
1093                 Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
1094                     return input_before_c
1095                 }
1096                 Some((c, utf8_c)) => {
1097                     self.check_url_code_point(c, &input);
1098                     self.serialization.extend(utf8_percent_encode(
1099                         utf8_c, SIMPLE_ENCODE_SET));
1100                 }
1101                 None => return input
1102             }
1103         }
1104     }
1105 
with_query_and_fragment(mut self, scheme_end: u32, username_end: u32, host_start: u32, host_end: u32, host: HostInternal, port: Option<u16>, path_start: u32, remaining: Input) -> ParseResult<Url>1106     fn with_query_and_fragment(mut self, scheme_end: u32, username_end: u32,
1107                                host_start: u32, host_end: u32, host: HostInternal,
1108                                port: Option<u16>, path_start: u32, remaining: Input)
1109                                -> ParseResult<Url> {
1110         let (query_start, fragment_start) =
1111             self.parse_query_and_fragment(scheme_end, remaining)?;
1112         Ok(Url {
1113             serialization: self.serialization,
1114             scheme_end: scheme_end,
1115             username_end: username_end,
1116             host_start: host_start,
1117             host_end: host_end,
1118             host: host,
1119             port: port,
1120             path_start: path_start,
1121             query_start: query_start,
1122             fragment_start: fragment_start
1123         })
1124     }
1125 
1126     /// Return (query_start, fragment_start)
parse_query_and_fragment(&mut self, scheme_end: u32, mut input: Input) -> ParseResult<(Option<u32>, Option<u32>)>1127     fn parse_query_and_fragment(&mut self, scheme_end: u32, mut input: Input)
1128                                 -> ParseResult<(Option<u32>, Option<u32>)> {
1129         let mut query_start = None;
1130         match input.next() {
1131             Some('#') => {}
1132             Some('?') => {
1133                 query_start = Some(to_u32(self.serialization.len())?);
1134                 self.serialization.push('?');
1135                 let remaining = self.parse_query(scheme_end, input);
1136                 if let Some(remaining) = remaining {
1137                     input = remaining
1138                 } else {
1139                     return Ok((query_start, None))
1140                 }
1141             }
1142             None => return Ok((None, None)),
1143             _ => panic!("Programming error. parse_query_and_fragment() called without ? or #")
1144         }
1145 
1146         let fragment_start = to_u32(self.serialization.len())?;
1147         self.serialization.push('#');
1148         self.parse_fragment(input);
1149         Ok((query_start, Some(fragment_start)))
1150     }
1151 
parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>) -> Option<Input<'i>>1152     pub fn parse_query<'i>(&mut self, scheme_end: u32, mut input: Input<'i>)
1153                            -> Option<Input<'i>> {
1154         let mut query = String::new();  // FIXME: use a streaming decoder instead
1155         let mut remaining = None;
1156         while let Some(c) = input.next() {
1157             if c == '#' && self.context == Context::UrlParser {
1158                 remaining = Some(input);
1159                 break
1160             } else {
1161                 self.check_url_code_point(c, &input);
1162                 query.push(c);
1163             }
1164         }
1165 
1166         let encoding = match &self.serialization[..scheme_end as usize] {
1167             "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override,
1168             _ => EncodingOverride::utf8(),
1169         };
1170         let query_bytes = encoding.encode(query.into());
1171         self.serialization.extend(percent_encode(&query_bytes, QUERY_ENCODE_SET));
1172         remaining
1173     }
1174 
fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url>1175     fn fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url> {
1176         let before_fragment = match base_url.fragment_start {
1177             Some(i) => base_url.slice(..i),
1178             None => &*base_url.serialization,
1179         };
1180         debug_assert!(self.serialization.is_empty());
1181         self.serialization.reserve(before_fragment.len() + input.chars.as_str().len());
1182         self.serialization.push_str(before_fragment);
1183         self.serialization.push('#');
1184         let next = input.next();
1185         debug_assert!(next == Some('#'));
1186         self.parse_fragment(input);
1187         Ok(Url {
1188             serialization: self.serialization,
1189             fragment_start: Some(to_u32(before_fragment.len())?),
1190             ..*base_url
1191         })
1192     }
1193 
parse_fragment(&mut self, mut input: Input)1194     pub fn parse_fragment(&mut self, mut input: Input) {
1195         while let Some((c, utf8_c)) = input.next_utf8() {
1196             if c ==  '\0' {
1197                 self.violation_fn.call(SyntaxViolation::NullInFragment)
1198             } else {
1199                 self.check_url_code_point(c, &input);
1200                 self.serialization.extend(utf8_percent_encode(utf8_c,
1201                                                               SIMPLE_ENCODE_SET));
1202             }
1203         }
1204     }
1205 
check_url_code_point(&self, c: char, input: &Input)1206     fn check_url_code_point(&self, c: char, input: &Input) {
1207         let vfn = self.violation_fn;
1208         if vfn.is_set() {
1209             if c == '%' {
1210                 let mut input = input.clone();
1211                 if !matches!((input.next(), input.next()), (Some(a), Some(b))
1212                              if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) {
1213                     vfn.call(SyntaxViolation::PercentDecode)
1214                 }
1215             } else if !is_url_code_point(c) {
1216                 vfn.call(SyntaxViolation::NonUrlCodePoint)
1217             }
1218         }
1219     }
1220 }
1221 
1222 #[inline]
is_ascii_hex_digit(c: char) -> bool1223 fn is_ascii_hex_digit(c: char) -> bool {
1224     matches!(c, 'a'...'f' | 'A'...'F' | '0'...'9')
1225 }
1226 
1227 // Non URL code points:
1228 // U+0000 to U+0020 (space)
1229 // " # % < > [ \ ] ^ ` { | }
1230 // U+007F to U+009F
1231 // surrogates
1232 // U+FDD0 to U+FDEF
1233 // Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1234 #[inline]
is_url_code_point(c: char) -> bool1235 fn is_url_code_point(c: char) -> bool {
1236     matches!(c,
1237         'a'...'z' |
1238         'A'...'Z' |
1239         '0'...'9' |
1240         '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
1241         '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
1242         '\u{A0}'...'\u{D7FF}' | '\u{E000}'...'\u{FDCF}' | '\u{FDF0}'...'\u{FFFD}' |
1243         '\u{10000}'...'\u{1FFFD}' | '\u{20000}'...'\u{2FFFD}' |
1244         '\u{30000}'...'\u{3FFFD}' | '\u{40000}'...'\u{4FFFD}' |
1245         '\u{50000}'...'\u{5FFFD}' | '\u{60000}'...'\u{6FFFD}' |
1246         '\u{70000}'...'\u{7FFFD}' | '\u{80000}'...'\u{8FFFD}' |
1247         '\u{90000}'...'\u{9FFFD}' | '\u{A0000}'...'\u{AFFFD}' |
1248         '\u{B0000}'...'\u{BFFFD}' | '\u{C0000}'...'\u{CFFFD}' |
1249         '\u{D0000}'...'\u{DFFFD}' | '\u{E1000}'...'\u{EFFFD}' |
1250         '\u{F0000}'...'\u{FFFFD}' | '\u{100000}'...'\u{10FFFD}')
1251 }
1252 
1253 /// https://url.spec.whatwg.org/#c0-controls-and-space
1254 #[inline]
c0_control_or_space(ch: char) -> bool1255 fn c0_control_or_space(ch: char) -> bool {
1256     ch <= ' '  // U+0000 to U+0020
1257 }
1258 
1259 /// https://url.spec.whatwg.org/#ascii-alpha
1260 #[inline]
ascii_alpha(ch: char) -> bool1261 pub fn ascii_alpha(ch: char) -> bool {
1262     matches!(ch, 'a'...'z' | 'A'...'Z')
1263 }
1264 
1265 #[inline]
to_u32(i: usize) -> ParseResult<u32>1266 pub fn to_u32(i: usize) -> ParseResult<u32> {
1267     if i <= ::std::u32::MAX as usize {
1268         Ok(i as u32)
1269     } else {
1270         Err(ParseError::Overflow)
1271     }
1272 }
1273 
1274 /// Wether the scheme is file:, the path has a single segment, and that segment
1275 /// is a Windows drive letter
is_windows_drive_letter(segment: &str) -> bool1276 fn is_windows_drive_letter(segment: &str) -> bool {
1277     segment.len() == 2
1278     && starts_with_windows_drive_letter(segment)
1279 }
1280 
starts_with_windows_drive_letter(s: &str) -> bool1281 fn starts_with_windows_drive_letter(s: &str) -> bool {
1282     ascii_alpha(s.as_bytes()[0] as char)
1283     && matches!(s.as_bytes()[1], b':' | b'|')
1284 }
1285 
starts_with_windows_drive_letter_segment(input: &Input) -> bool1286 fn starts_with_windows_drive_letter_segment(input: &Input) -> bool {
1287     let mut input = input.clone();
1288     matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c))
1289              if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#'))
1290 }
1291