1 // Copyright 2013-2016 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8
9 use std::error::Error;
10 use std::fmt::{self, Formatter, Write};
11 use std::str;
12
13 use host::{Host, HostInternal};
14 use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
15 use query_encoding::EncodingOverride;
16 use Url;
17
18 /// https://url.spec.whatwg.org/#fragment-percent-encode-set
19 const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
20
21 /// https://url.spec.whatwg.org/#path-percent-encode-set
22 const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
23
24 /// https://url.spec.whatwg.org/#userinfo-percent-encode-set
25 pub(crate) const USERINFO: &AsciiSet = &PATH
26 .add(b'/')
27 .add(b':')
28 .add(b';')
29 .add(b'=')
30 .add(b'@')
31 .add(b'[')
32 .add(b'\\')
33 .add(b']')
34 .add(b'^')
35 .add(b'|');
36
37 pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
38
39 // The backslash (\) character is treated as a path separator in special URLs
40 // so it needs to be additionally escaped in that case.
41 pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
42
43 // https://url.spec.whatwg.org/#query-state
44 const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
45 const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
46
47 pub type ParseResult<T> = Result<T, ParseError>;
48
49 macro_rules! simple_enum_error {
50 ($($name: ident => $description: expr,)+) => {
51 /// Errors that can occur during parsing.
52 ///
53 /// This may be extended in the future so exhaustive matching is
54 /// discouraged with an unused variant.
55 #[derive(PartialEq, Eq, Clone, Copy, Debug)]
56 pub enum ParseError {
57 $(
58 $name,
59 )+
60 /// Unused variant enable non-exhaustive matching
61 #[doc(hidden)]
62 __FutureProof,
63 }
64
65 impl Error for ParseError {
66 fn description(&self) -> &str {
67 match *self {
68 $(
69 ParseError::$name => $description,
70 )+
71 ParseError::__FutureProof => {
72 unreachable!("Don't abuse the FutureProof!");
73 }
74 }
75 }
76 }
77 }
78 }
79
80 simple_enum_error! {
81 EmptyHost => "empty host",
82 IdnaError => "invalid international domain name",
83 InvalidPort => "invalid port number",
84 InvalidIpv4Address => "invalid IPv4 address",
85 InvalidIpv6Address => "invalid IPv6 address",
86 InvalidDomainCharacter => "invalid domain character",
87 RelativeUrlWithoutBase => "relative URL without a base",
88 RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
89 SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
90 Overflow => "URLs more than 4 GB are not supported",
91 }
92
93 impl fmt::Display for ParseError {
fmt(&self, f: &mut Formatter) -> fmt::Result94 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
95 fmt::Display::fmt(self.description(), f)
96 }
97 }
98
99 impl From<::idna::Errors> for ParseError {
from(_: ::idna::Errors) -> ParseError100 fn from(_: ::idna::Errors) -> ParseError {
101 ParseError::IdnaError
102 }
103 }
104
105 macro_rules! syntax_violation_enum {
106 ($($name: ident => $description: expr,)+) => {
107 /// Non-fatal syntax violations that can occur during parsing.
108 ///
109 /// This may be extended in the future so exhaustive matching is
110 /// discouraged with an unused variant.
111 #[derive(PartialEq, Eq, Clone, Copy, Debug)]
112 pub enum SyntaxViolation {
113 $(
114 $name,
115 )+
116 /// Unused variant enable non-exhaustive matching
117 #[doc(hidden)]
118 __FutureProof,
119 }
120
121 impl SyntaxViolation {
122 pub fn description(&self) -> &'static str {
123 match *self {
124 $(
125 SyntaxViolation::$name => $description,
126 )+
127 SyntaxViolation::__FutureProof => {
128 unreachable!("Don't abuse the FutureProof!");
129 }
130 }
131 }
132 }
133 }
134 }
135
136 syntax_violation_enum! {
137 Backslash => "backslash",
138 C0SpaceIgnored =>
139 "leading or trailing control or space character are ignored in URLs",
140 EmbeddedCredentials =>
141 "embedding authentication information (username or password) \
142 in an URL is not recommended",
143 ExpectedDoubleSlash => "expected //",
144 ExpectedFileDoubleSlash => "expected // after file:",
145 FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
146 NonUrlCodePoint => "non-URL code point",
147 NullInFragment => "NULL characters are ignored in URL fragment identifiers",
148 PercentDecode => "expected 2 hex digits after %",
149 TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
150 UnencodedAtSign => "unencoded @ sign in username or password",
151 }
152
153 impl fmt::Display for SyntaxViolation {
fmt(&self, f: &mut Formatter) -> fmt::Result154 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
155 fmt::Display::fmt(self.description(), f)
156 }
157 }
158
159 #[derive(Copy, Clone)]
160 pub enum SchemeType {
161 File,
162 SpecialNotFile,
163 NotSpecial,
164 }
165
166 impl SchemeType {
is_special(&self) -> bool167 pub fn is_special(&self) -> bool {
168 !matches!(*self, SchemeType::NotSpecial)
169 }
170
is_file(&self) -> bool171 pub fn is_file(&self) -> bool {
172 matches!(*self, SchemeType::File)
173 }
174
from(s: &str) -> Self175 pub fn from(s: &str) -> Self {
176 match s {
177 "http" | "https" | "ws" | "wss" | "ftp" | "gopher" => SchemeType::SpecialNotFile,
178 "file" => SchemeType::File,
179 _ => SchemeType::NotSpecial,
180 }
181 }
182 }
183
default_port(scheme: &str) -> Option<u16>184 pub fn default_port(scheme: &str) -> Option<u16> {
185 match scheme {
186 "http" | "ws" => Some(80),
187 "https" | "wss" => Some(443),
188 "ftp" => Some(21),
189 "gopher" => Some(70),
190 _ => None,
191 }
192 }
193
194 #[derive(Clone)]
195 pub struct Input<'i> {
196 chars: str::Chars<'i>,
197 }
198
199 impl<'i> Input<'i> {
new(input: &'i str) -> Self200 pub fn new(input: &'i str) -> Self {
201 Input::with_log(input, None)
202 }
203
with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self204 pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
205 let input = original_input.trim_matches(c0_control_or_space);
206 if let Some(vfn) = vfn {
207 if input.len() < original_input.len() {
208 vfn(SyntaxViolation::C0SpaceIgnored)
209 }
210 if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
211 vfn(SyntaxViolation::TabOrNewlineIgnored)
212 }
213 }
214 Input {
215 chars: input.chars(),
216 }
217 }
218
219 #[inline]
is_empty(&self) -> bool220 pub fn is_empty(&self) -> bool {
221 self.clone().next().is_none()
222 }
223
224 #[inline]
starts_with<P: Pattern>(&self, p: P) -> bool225 fn starts_with<P: Pattern>(&self, p: P) -> bool {
226 p.split_prefix(&mut self.clone())
227 }
228
229 #[inline]
split_prefix<P: Pattern>(&self, p: P) -> Option<Self>230 pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
231 let mut remaining = self.clone();
232 if p.split_prefix(&mut remaining) {
233 Some(remaining)
234 } else {
235 None
236 }
237 }
238
239 #[inline]
split_first(&self) -> (Option<char>, Self)240 fn split_first(&self) -> (Option<char>, Self) {
241 let mut remaining = self.clone();
242 (remaining.next(), remaining)
243 }
244
245 #[inline]
count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self)246 fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
247 let mut count = 0;
248 let mut remaining = self.clone();
249 loop {
250 let mut input = remaining.clone();
251 if matches!(input.next(), Some(c) if f(c)) {
252 remaining = input;
253 count += 1;
254 } else {
255 return (count, remaining);
256 }
257 }
258 }
259
260 #[inline]
next_utf8(&mut self) -> Option<(char, &'i str)>261 fn next_utf8(&mut self) -> Option<(char, &'i str)> {
262 loop {
263 let utf8 = self.chars.as_str();
264 match self.chars.next() {
265 Some(c) => {
266 if !matches!(c, '\t' | '\n' | '\r') {
267 return Some((c, &utf8[..c.len_utf8()]));
268 }
269 }
270 None => return None,
271 }
272 }
273 }
274 }
275
276 pub trait Pattern {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool277 fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool;
278 }
279
280 impl Pattern for char {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool281 fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
282 input.next() == Some(self)
283 }
284 }
285
286 impl<'a> Pattern for &'a str {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool287 fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
288 for c in self.chars() {
289 if input.next() != Some(c) {
290 return false;
291 }
292 }
293 true
294 }
295 }
296
297 impl<F: FnMut(char) -> bool> Pattern for F {
split_prefix<'i>(self, input: &mut Input<'i>) -> bool298 fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
299 input.next().map_or(false, self)
300 }
301 }
302
303 impl<'i> Iterator for Input<'i> {
304 type Item = char;
next(&mut self) -> Option<char>305 fn next(&mut self) -> Option<char> {
306 self.chars
307 .by_ref()
308 .find(|&c| !matches!(c, '\t' | '\n' | '\r'))
309 }
310 }
311
312 pub struct Parser<'a> {
313 pub serialization: String,
314 pub base_url: Option<&'a Url>,
315 pub query_encoding_override: EncodingOverride<'a>,
316 pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
317 pub context: Context,
318 }
319
320 #[derive(PartialEq, Eq, Copy, Clone)]
321 pub enum Context {
322 UrlParser,
323 Setter,
324 PathSegmentSetter,
325 }
326
327 impl<'a> Parser<'a> {
log_violation(&self, v: SyntaxViolation)328 fn log_violation(&self, v: SyntaxViolation) {
329 if let Some(f) = self.violation_fn {
330 f(v)
331 }
332 }
333
log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool)334 fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
335 if let Some(f) = self.violation_fn {
336 if test() {
337 f(v)
338 }
339 }
340 }
341
for_setter(serialization: String) -> Parser<'a>342 pub fn for_setter(serialization: String) -> Parser<'a> {
343 Parser {
344 serialization,
345 base_url: None,
346 query_encoding_override: None,
347 violation_fn: None,
348 context: Context::Setter,
349 }
350 }
351
352 /// https://url.spec.whatwg.org/#concept-basic-url-parser
parse_url(mut self, input: &str) -> ParseResult<Url>353 pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
354 let input = Input::with_log(input, self.violation_fn);
355 if let Ok(remaining) = self.parse_scheme(input.clone()) {
356 return self.parse_with_scheme(remaining);
357 }
358
359 // No-scheme state
360 if let Some(base_url) = self.base_url {
361 if input.starts_with('#') {
362 self.fragment_only(base_url, input)
363 } else if base_url.cannot_be_a_base() {
364 Err(ParseError::RelativeUrlWithCannotBeABaseBase)
365 } else {
366 let scheme_type = SchemeType::from(base_url.scheme());
367 if scheme_type.is_file() {
368 self.parse_file(input, scheme_type, Some(base_url))
369 } else {
370 self.parse_relative(input, scheme_type, base_url)
371 }
372 }
373 } else {
374 Err(ParseError::RelativeUrlWithoutBase)
375 }
376 }
377
parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()>378 pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
379 if input.is_empty() || !input.starts_with(ascii_alpha) {
380 return Err(());
381 }
382 debug_assert!(self.serialization.is_empty());
383 while let Some(c) = input.next() {
384 match c {
385 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => {
386 self.serialization.push(c.to_ascii_lowercase())
387 }
388 ':' => return Ok(input),
389 _ => {
390 self.serialization.clear();
391 return Err(());
392 }
393 }
394 }
395 // EOF before ':'
396 if self.context == Context::Setter {
397 Ok(input)
398 } else {
399 self.serialization.clear();
400 Err(())
401 }
402 }
403
parse_with_scheme(mut self, input: Input) -> ParseResult<Url>404 fn parse_with_scheme(mut self, input: Input) -> ParseResult<Url> {
405 use SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
406 let scheme_end = to_u32(self.serialization.len())?;
407 let scheme_type = SchemeType::from(&self.serialization);
408 self.serialization.push(':');
409 match scheme_type {
410 SchemeType::File => {
411 self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
412 let base_file_url = self.base_url.and_then(|base| {
413 if base.scheme() == "file" {
414 Some(base)
415 } else {
416 None
417 }
418 });
419 self.serialization.clear();
420 self.parse_file(input, scheme_type, base_file_url)
421 }
422 SchemeType::SpecialNotFile => {
423 // special relative or authority state
424 let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
425 if let Some(base_url) = self.base_url {
426 if slashes_count < 2
427 && base_url.scheme() == &self.serialization[..scheme_end as usize]
428 {
429 // "Cannot-be-a-base" URLs only happen with "not special" schemes.
430 debug_assert!(!base_url.cannot_be_a_base());
431 self.serialization.clear();
432 return self.parse_relative(input, scheme_type, base_url);
433 }
434 }
435 // special authority slashes state
436 self.log_violation_if(ExpectedDoubleSlash, || {
437 input
438 .clone()
439 .take_while(|&c| matches!(c, '/' | '\\'))
440 .collect::<String>()
441 != "//"
442 });
443 self.after_double_slash(remaining, scheme_type, scheme_end)
444 }
445 SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
446 }
447 }
448
449 /// Scheme other than file, http, https, ws, ws, ftp, gopher.
parse_non_special( mut self, input: Input, scheme_type: SchemeType, scheme_end: u32, ) -> ParseResult<Url>450 fn parse_non_special(
451 mut self,
452 input: Input,
453 scheme_type: SchemeType,
454 scheme_end: u32,
455 ) -> ParseResult<Url> {
456 // path or authority state (
457 if let Some(input) = input.split_prefix("//") {
458 return self.after_double_slash(input, scheme_type, scheme_end);
459 }
460 // Anarchist URL (no authority)
461 let path_start = to_u32(self.serialization.len())?;
462 let username_end = path_start;
463 let host_start = path_start;
464 let host_end = path_start;
465 let host = HostInternal::None;
466 let port = None;
467 let remaining = if let Some(input) = input.split_prefix('/') {
468 let path_start = self.serialization.len();
469 self.serialization.push('/');
470 self.parse_path(scheme_type, &mut false, path_start, input)
471 } else {
472 self.parse_cannot_be_a_base_path(input)
473 };
474 self.with_query_and_fragment(
475 scheme_type,
476 scheme_end,
477 username_end,
478 host_start,
479 host_end,
480 host,
481 port,
482 path_start,
483 remaining,
484 )
485 }
486
parse_file( mut self, input: Input, scheme_type: SchemeType, mut base_file_url: Option<&Url>, ) -> ParseResult<Url>487 fn parse_file(
488 mut self,
489 input: Input,
490 scheme_type: SchemeType,
491 mut base_file_url: Option<&Url>,
492 ) -> ParseResult<Url> {
493 use SyntaxViolation::Backslash;
494 // file state
495 debug_assert!(self.serialization.is_empty());
496 let (first_char, input_after_first_char) = input.split_first();
497 match first_char {
498 None => {
499 if let Some(base_url) = base_file_url {
500 // Copy everything except the fragment
501 let before_fragment = match base_url.fragment_start {
502 Some(i) => &base_url.serialization[..i as usize],
503 None => &*base_url.serialization,
504 };
505 self.serialization.push_str(before_fragment);
506 Ok(Url {
507 serialization: self.serialization,
508 fragment_start: None,
509 ..*base_url
510 })
511 } else {
512 self.serialization.push_str("file:///");
513 let scheme_end = "file".len() as u32;
514 let path_start = "file://".len() as u32;
515 Ok(Url {
516 serialization: self.serialization,
517 scheme_end,
518 username_end: path_start,
519 host_start: path_start,
520 host_end: path_start,
521 host: HostInternal::None,
522 port: None,
523 path_start,
524 query_start: None,
525 fragment_start: None,
526 })
527 }
528 }
529 Some('?') => {
530 if let Some(base_url) = base_file_url {
531 // Copy everything up to the query string
532 let before_query = match (base_url.query_start, base_url.fragment_start) {
533 (None, None) => &*base_url.serialization,
534 (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
535 };
536 self.serialization.push_str(before_query);
537 let (query_start, fragment_start) =
538 self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
539 Ok(Url {
540 serialization: self.serialization,
541 query_start,
542 fragment_start,
543 ..*base_url
544 })
545 } else {
546 self.serialization.push_str("file:///");
547 let scheme_end = "file".len() as u32;
548 let path_start = "file://".len() as u32;
549 let (query_start, fragment_start) =
550 self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
551 Ok(Url {
552 serialization: self.serialization,
553 scheme_end,
554 username_end: path_start,
555 host_start: path_start,
556 host_end: path_start,
557 host: HostInternal::None,
558 port: None,
559 path_start,
560 query_start,
561 fragment_start,
562 })
563 }
564 }
565 Some('#') => {
566 if let Some(base_url) = base_file_url {
567 self.fragment_only(base_url, input)
568 } else {
569 self.serialization.push_str("file:///");
570 let scheme_end = "file".len() as u32;
571 let path_start = "file://".len() as u32;
572 let fragment_start = "file:///".len() as u32;
573 self.serialization.push('#');
574 self.parse_fragment(input_after_first_char);
575 Ok(Url {
576 serialization: self.serialization,
577 scheme_end,
578 username_end: path_start,
579 host_start: path_start,
580 host_end: path_start,
581 host: HostInternal::None,
582 port: None,
583 path_start,
584 query_start: None,
585 fragment_start: Some(fragment_start),
586 })
587 }
588 }
589 Some('/') | Some('\\') => {
590 self.log_violation_if(Backslash, || first_char == Some('\\'));
591 // file slash state
592 let (next_char, input_after_next_char) = input_after_first_char.split_first();
593 self.log_violation_if(Backslash, || next_char == Some('\\'));
594 if matches!(next_char, Some('/') | Some('\\')) {
595 // file host state
596 self.serialization.push_str("file://");
597 let scheme_end = "file".len() as u32;
598 let host_start = "file://".len() as u32;
599 let (path_start, mut host, remaining) =
600 self.parse_file_host(input_after_next_char)?;
601 let mut host_end = to_u32(self.serialization.len())?;
602 let mut has_host = !matches!(host, HostInternal::None);
603 let remaining = if path_start {
604 self.parse_path_start(SchemeType::File, &mut has_host, remaining)
605 } else {
606 let path_start = self.serialization.len();
607 self.serialization.push('/');
608 self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
609 };
610 // For file URLs that have a host and whose path starts
611 // with the windows drive letter we just remove the host.
612 if !has_host {
613 self.serialization
614 .drain(host_start as usize..host_end as usize);
615 host_end = host_start;
616 host = HostInternal::None;
617 }
618 let (query_start, fragment_start) =
619 self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
620 Ok(Url {
621 serialization: self.serialization,
622 scheme_end,
623 username_end: host_start,
624 host_start,
625 host_end,
626 host,
627 port: None,
628 path_start: host_end,
629 query_start,
630 fragment_start,
631 })
632 } else {
633 self.serialization.push_str("file:///");
634 let scheme_end = "file".len() as u32;
635 let path_start = "file://".len();
636 if let Some(base_url) = base_file_url {
637 let first_segment = base_url.path_segments().unwrap().next().unwrap();
638 // FIXME: *normalized* drive letter
639 if is_windows_drive_letter(first_segment) {
640 self.serialization.push_str(first_segment);
641 self.serialization.push('/');
642 }
643 }
644 let remaining = self.parse_path(
645 SchemeType::File,
646 &mut false,
647 path_start,
648 input_after_first_char,
649 );
650 let (query_start, fragment_start) =
651 self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
652 let path_start = path_start as u32;
653 Ok(Url {
654 serialization: self.serialization,
655 scheme_end,
656 username_end: path_start,
657 host_start: path_start,
658 host_end: path_start,
659 host: HostInternal::None,
660 port: None,
661 path_start,
662 query_start,
663 fragment_start,
664 })
665 }
666 }
667 _ => {
668 if starts_with_windows_drive_letter_segment(&input) {
669 base_file_url = None;
670 }
671 if let Some(base_url) = base_file_url {
672 let before_query = match (base_url.query_start, base_url.fragment_start) {
673 (None, None) => &*base_url.serialization,
674 (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
675 };
676 self.serialization.push_str(before_query);
677 self.pop_path(SchemeType::File, base_url.path_start as usize);
678 let remaining = self.parse_path(
679 SchemeType::File,
680 &mut true,
681 base_url.path_start as usize,
682 input,
683 );
684 self.with_query_and_fragment(
685 SchemeType::File,
686 base_url.scheme_end,
687 base_url.username_end,
688 base_url.host_start,
689 base_url.host_end,
690 base_url.host,
691 base_url.port,
692 base_url.path_start,
693 remaining,
694 )
695 } else {
696 self.serialization.push_str("file:///");
697 let scheme_end = "file".len() as u32;
698 let path_start = "file://".len();
699 let remaining =
700 self.parse_path(SchemeType::File, &mut false, path_start, input);
701 let (query_start, fragment_start) =
702 self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
703 let path_start = path_start as u32;
704 Ok(Url {
705 serialization: self.serialization,
706 scheme_end,
707 username_end: path_start,
708 host_start: path_start,
709 host_end: path_start,
710 host: HostInternal::None,
711 port: None,
712 path_start,
713 query_start,
714 fragment_start,
715 })
716 }
717 }
718 }
719 }
720
parse_relative( mut self, input: Input, scheme_type: SchemeType, base_url: &Url, ) -> ParseResult<Url>721 fn parse_relative(
722 mut self,
723 input: Input,
724 scheme_type: SchemeType,
725 base_url: &Url,
726 ) -> ParseResult<Url> {
727 // relative state
728 debug_assert!(self.serialization.is_empty());
729 let (first_char, input_after_first_char) = input.split_first();
730 match first_char {
731 None => {
732 // Copy everything except the fragment
733 let before_fragment = match base_url.fragment_start {
734 Some(i) => &base_url.serialization[..i as usize],
735 None => &*base_url.serialization,
736 };
737 self.serialization.push_str(before_fragment);
738 Ok(Url {
739 serialization: self.serialization,
740 fragment_start: None,
741 ..*base_url
742 })
743 }
744 Some('?') => {
745 // Copy everything up to the query string
746 let before_query = match (base_url.query_start, base_url.fragment_start) {
747 (None, None) => &*base_url.serialization,
748 (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
749 };
750 self.serialization.push_str(before_query);
751 let (query_start, fragment_start) =
752 self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
753 Ok(Url {
754 serialization: self.serialization,
755 query_start,
756 fragment_start,
757 ..*base_url
758 })
759 }
760 Some('#') => self.fragment_only(base_url, input),
761 Some('/') | Some('\\') => {
762 let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
763 if slashes_count >= 2 {
764 self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || {
765 input
766 .clone()
767 .take_while(|&c| matches!(c, '/' | '\\'))
768 .collect::<String>()
769 != "//"
770 });
771 let scheme_end = base_url.scheme_end;
772 debug_assert!(base_url.byte_at(scheme_end) == b':');
773 self.serialization
774 .push_str(base_url.slice(..scheme_end + 1));
775 return self.after_double_slash(remaining, scheme_type, scheme_end);
776 }
777 let path_start = base_url.path_start;
778 debug_assert!(base_url.byte_at(path_start) == b'/');
779 self.serialization
780 .push_str(base_url.slice(..path_start + 1));
781 let remaining = self.parse_path(
782 scheme_type,
783 &mut true,
784 path_start as usize,
785 input_after_first_char,
786 );
787 self.with_query_and_fragment(
788 scheme_type,
789 base_url.scheme_end,
790 base_url.username_end,
791 base_url.host_start,
792 base_url.host_end,
793 base_url.host,
794 base_url.port,
795 base_url.path_start,
796 remaining,
797 )
798 }
799 _ => {
800 let before_query = match (base_url.query_start, base_url.fragment_start) {
801 (None, None) => &*base_url.serialization,
802 (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
803 };
804 self.serialization.push_str(before_query);
805 // FIXME spec says just "remove last entry", not the "pop" algorithm
806 self.pop_path(scheme_type, base_url.path_start as usize);
807 let remaining =
808 self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input);
809 self.with_query_and_fragment(
810 scheme_type,
811 base_url.scheme_end,
812 base_url.username_end,
813 base_url.host_start,
814 base_url.host_end,
815 base_url.host,
816 base_url.port,
817 base_url.path_start,
818 remaining,
819 )
820 }
821 }
822 }
823
after_double_slash( mut self, input: Input, scheme_type: SchemeType, scheme_end: u32, ) -> ParseResult<Url>824 fn after_double_slash(
825 mut self,
826 input: Input,
827 scheme_type: SchemeType,
828 scheme_end: u32,
829 ) -> ParseResult<Url> {
830 self.serialization.push('/');
831 self.serialization.push('/');
832 // authority state
833 let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
834 // host state
835 let host_start = to_u32(self.serialization.len())?;
836 let (host_end, host, port, remaining) =
837 self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
838 // path state
839 let path_start = to_u32(self.serialization.len())?;
840 let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
841 self.with_query_and_fragment(
842 scheme_type,
843 scheme_end,
844 username_end,
845 host_start,
846 host_end,
847 host,
848 port,
849 path_start,
850 remaining,
851 )
852 }
853
854 /// Return (username_end, remaining)
parse_userinfo<'i>( &mut self, mut input: Input<'i>, scheme_type: SchemeType, ) -> ParseResult<(u32, Input<'i>)>855 fn parse_userinfo<'i>(
856 &mut self,
857 mut input: Input<'i>,
858 scheme_type: SchemeType,
859 ) -> ParseResult<(u32, Input<'i>)> {
860 let mut last_at = None;
861 let mut remaining = input.clone();
862 let mut char_count = 0;
863 while let Some(c) = remaining.next() {
864 match c {
865 '@' => {
866 if last_at.is_some() {
867 self.log_violation(SyntaxViolation::UnencodedAtSign)
868 } else {
869 self.log_violation(SyntaxViolation::EmbeddedCredentials)
870 }
871 last_at = Some((char_count, remaining.clone()))
872 }
873 '/' | '?' | '#' => break,
874 '\\' if scheme_type.is_special() => break,
875 _ => (),
876 }
877 char_count += 1;
878 }
879 let (mut userinfo_char_count, remaining) = match last_at {
880 None => return Ok((to_u32(self.serialization.len())?, input)),
881 Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
882 Some(x) => x,
883 };
884
885 let mut username_end = None;
886 let mut has_password = false;
887 let mut has_username = false;
888 while userinfo_char_count > 0 {
889 let (c, utf8_c) = input.next_utf8().unwrap();
890 userinfo_char_count -= 1;
891 if c == ':' && username_end.is_none() {
892 // Start parsing password
893 username_end = Some(to_u32(self.serialization.len())?);
894 // We don't add a colon if the password is empty
895 if userinfo_char_count > 0 {
896 self.serialization.push(':');
897 has_password = true;
898 }
899 } else {
900 if !has_password {
901 has_username = true;
902 }
903 self.check_url_code_point(c, &input);
904 self.serialization
905 .extend(utf8_percent_encode(utf8_c, USERINFO));
906 }
907 }
908 let username_end = match username_end {
909 Some(i) => i,
910 None => to_u32(self.serialization.len())?,
911 };
912 if has_username || has_password {
913 self.serialization.push('@');
914 }
915 Ok((username_end, remaining))
916 }
917
parse_host_and_port<'i>( &mut self, input: Input<'i>, scheme_end: u32, scheme_type: SchemeType, ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)>918 fn parse_host_and_port<'i>(
919 &mut self,
920 input: Input<'i>,
921 scheme_end: u32,
922 scheme_type: SchemeType,
923 ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
924 let (host, remaining) = Parser::parse_host(input, scheme_type)?;
925 write!(&mut self.serialization, "{}", host).unwrap();
926 let host_end = to_u32(self.serialization.len())?;
927 let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
928 let scheme = || default_port(&self.serialization[..scheme_end as usize]);
929 Parser::parse_port(remaining, scheme, self.context)?
930 } else {
931 (None, remaining)
932 };
933 if let Some(port) = port {
934 write!(&mut self.serialization, ":{}", port).unwrap()
935 }
936 Ok((host_end, host.into(), port, remaining))
937 }
938
parse_host( mut input: Input, scheme_type: SchemeType, ) -> ParseResult<(Host<String>, Input)>939 pub fn parse_host(
940 mut input: Input,
941 scheme_type: SchemeType,
942 ) -> ParseResult<(Host<String>, Input)> {
943 // Undo the Input abstraction here to avoid allocating in the common case
944 // where the host part of the input does not contain any tab or newline
945 let input_str = input.chars.as_str();
946 let mut inside_square_brackets = false;
947 let mut has_ignored_chars = false;
948 let mut non_ignored_chars = 0;
949 let mut bytes = 0;
950 for c in input_str.chars() {
951 match c {
952 ':' if !inside_square_brackets => break,
953 '\\' if scheme_type.is_special() => break,
954 '/' | '?' | '#' => break,
955 '\t' | '\n' | '\r' => {
956 has_ignored_chars = true;
957 }
958 '[' => {
959 inside_square_brackets = true;
960 non_ignored_chars += 1
961 }
962 ']' => {
963 inside_square_brackets = false;
964 non_ignored_chars += 1
965 }
966 _ => non_ignored_chars += 1,
967 }
968 bytes += c.len_utf8();
969 }
970 let replaced: String;
971 let host_str;
972 {
973 let host_input = input.by_ref().take(non_ignored_chars);
974 if has_ignored_chars {
975 replaced = host_input.collect();
976 host_str = &*replaced
977 } else {
978 for _ in host_input {}
979 host_str = &input_str[..bytes]
980 }
981 }
982 if scheme_type.is_special() && host_str.is_empty() {
983 return Err(ParseError::EmptyHost);
984 }
985 if !scheme_type.is_special() {
986 let host = Host::parse_opaque(host_str)?;
987 return Ok((host, input));
988 }
989 let host = Host::parse(host_str)?;
990 Ok((host, input))
991 }
992
parse_file_host<'i>( &mut self, input: Input<'i>, ) -> ParseResult<(bool, HostInternal, Input<'i>)>993 pub(crate) fn parse_file_host<'i>(
994 &mut self,
995 input: Input<'i>,
996 ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
997 // Undo the Input abstraction here to avoid allocating in the common case
998 // where the host part of the input does not contain any tab or newline
999 let input_str = input.chars.as_str();
1000 let mut has_ignored_chars = false;
1001 let mut non_ignored_chars = 0;
1002 let mut bytes = 0;
1003 for c in input_str.chars() {
1004 match c {
1005 '/' | '\\' | '?' | '#' => break,
1006 '\t' | '\n' | '\r' => has_ignored_chars = true,
1007 _ => non_ignored_chars += 1,
1008 }
1009 bytes += c.len_utf8();
1010 }
1011 let replaced: String;
1012 let host_str;
1013 let mut remaining = input.clone();
1014 {
1015 let host_input = remaining.by_ref().take(non_ignored_chars);
1016 if has_ignored_chars {
1017 replaced = host_input.collect();
1018 host_str = &*replaced
1019 } else {
1020 for _ in host_input {}
1021 host_str = &input_str[..bytes]
1022 }
1023 }
1024 if is_windows_drive_letter(host_str) {
1025 return Ok((false, HostInternal::None, input));
1026 }
1027 let host = if host_str.is_empty() {
1028 HostInternal::None
1029 } else {
1030 match Host::parse(host_str)? {
1031 Host::Domain(ref d) if d == "localhost" => HostInternal::None,
1032 host => {
1033 write!(&mut self.serialization, "{}", host).unwrap();
1034 host.into()
1035 }
1036 }
1037 };
1038 Ok((true, host, remaining))
1039 }
1040
parse_port<P>( mut input: Input, default_port: P, context: Context, ) -> ParseResult<(Option<u16>, Input)> where P: Fn() -> Option<u16>,1041 pub fn parse_port<P>(
1042 mut input: Input,
1043 default_port: P,
1044 context: Context,
1045 ) -> ParseResult<(Option<u16>, Input)>
1046 where
1047 P: Fn() -> Option<u16>,
1048 {
1049 let mut port: u32 = 0;
1050 let mut has_any_digit = false;
1051 while let (Some(c), remaining) = input.split_first() {
1052 if let Some(digit) = c.to_digit(10) {
1053 port = port * 10 + digit;
1054 if port > ::std::u16::MAX as u32 {
1055 return Err(ParseError::InvalidPort);
1056 }
1057 has_any_digit = true;
1058 } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
1059 return Err(ParseError::InvalidPort);
1060 } else {
1061 break;
1062 }
1063 input = remaining;
1064 }
1065 let mut opt_port = Some(port as u16);
1066 if !has_any_digit || opt_port == default_port() {
1067 opt_port = None;
1068 }
1069 Ok((opt_port, input))
1070 }
1071
parse_path_start<'i>( &mut self, scheme_type: SchemeType, has_host: &mut bool, mut input: Input<'i>, ) -> Input<'i>1072 pub fn parse_path_start<'i>(
1073 &mut self,
1074 scheme_type: SchemeType,
1075 has_host: &mut bool,
1076 mut input: Input<'i>,
1077 ) -> Input<'i> {
1078 // Path start state
1079 match input.split_first() {
1080 (Some('/'), remaining) => input = remaining,
1081 (Some('\\'), remaining) => {
1082 if scheme_type.is_special() {
1083 self.log_violation(SyntaxViolation::Backslash);
1084 input = remaining
1085 }
1086 }
1087 _ => {}
1088 }
1089 let path_start = self.serialization.len();
1090 self.serialization.push('/');
1091 self.parse_path(scheme_type, has_host, path_start, input)
1092 }
1093
parse_path<'i>( &mut self, scheme_type: SchemeType, has_host: &mut bool, path_start: usize, mut input: Input<'i>, ) -> Input<'i>1094 pub fn parse_path<'i>(
1095 &mut self,
1096 scheme_type: SchemeType,
1097 has_host: &mut bool,
1098 path_start: usize,
1099 mut input: Input<'i>,
1100 ) -> Input<'i> {
1101 // Relative path state
1102 debug_assert!(self.serialization.ends_with('/'));
1103 loop {
1104 let segment_start = self.serialization.len();
1105 let mut ends_with_slash = false;
1106 loop {
1107 let input_before_c = input.clone();
1108 let (c, utf8_c) = if let Some(x) = input.next_utf8() {
1109 x
1110 } else {
1111 break;
1112 };
1113 match c {
1114 '/' if self.context != Context::PathSegmentSetter => {
1115 ends_with_slash = true;
1116 break;
1117 }
1118 '\\' if self.context != Context::PathSegmentSetter
1119 && scheme_type.is_special() =>
1120 {
1121 self.log_violation(SyntaxViolation::Backslash);
1122 ends_with_slash = true;
1123 break;
1124 }
1125 '?' | '#' if self.context == Context::UrlParser => {
1126 input = input_before_c;
1127 break;
1128 }
1129 _ => {
1130 self.check_url_code_point(c, &input);
1131 if self.context == Context::PathSegmentSetter {
1132 if scheme_type.is_special() {
1133 self.serialization
1134 .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
1135 } else {
1136 self.serialization
1137 .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
1138 }
1139 } else {
1140 self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
1141 }
1142 }
1143 }
1144 }
1145 match &self.serialization[segment_start..] {
1146 ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
1147 | ".%2E" => {
1148 debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
1149 self.serialization.truncate(segment_start - 1); // Truncate "/.."
1150 self.pop_path(scheme_type, path_start);
1151 if !self.serialization[path_start..].ends_with('/') {
1152 self.serialization.push('/')
1153 }
1154 }
1155 "." | "%2e" | "%2E" => {
1156 self.serialization.truncate(segment_start);
1157 }
1158 _ => {
1159 if scheme_type.is_file()
1160 && is_windows_drive_letter(&self.serialization[path_start + 1..])
1161 {
1162 if self.serialization.ends_with('|') {
1163 self.serialization.pop();
1164 self.serialization.push(':');
1165 }
1166 if *has_host {
1167 self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
1168 *has_host = false; // FIXME account for this in callers
1169 }
1170 }
1171 if ends_with_slash {
1172 self.serialization.push('/')
1173 }
1174 }
1175 }
1176 if !ends_with_slash {
1177 break;
1178 }
1179 }
1180 input
1181 }
1182
1183 /// https://url.spec.whatwg.org/#pop-a-urls-path
pop_path(&mut self, scheme_type: SchemeType, path_start: usize)1184 fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
1185 if self.serialization.len() > path_start {
1186 let slash_position = self.serialization[path_start..].rfind('/').unwrap();
1187 // + 1 since rfind returns the position before the slash.
1188 let segment_start = path_start + slash_position + 1;
1189 // Don’t pop a Windows drive letter
1190 // FIXME: *normalized* Windows drive letter
1191 if !(scheme_type.is_file()
1192 && is_windows_drive_letter(&self.serialization[segment_start..]))
1193 {
1194 self.serialization.truncate(segment_start);
1195 }
1196 }
1197 }
1198
parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i>1199 pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
1200 loop {
1201 let input_before_c = input.clone();
1202 match input.next_utf8() {
1203 Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
1204 return input_before_c
1205 }
1206 Some((c, utf8_c)) => {
1207 self.check_url_code_point(c, &input);
1208 self.serialization
1209 .extend(utf8_percent_encode(utf8_c, CONTROLS));
1210 }
1211 None => return input,
1212 }
1213 }
1214 }
1215
with_query_and_fragment( mut self, scheme_type: SchemeType, scheme_end: u32, username_end: u32, host_start: u32, host_end: u32, host: HostInternal, port: Option<u16>, path_start: u32, remaining: Input, ) -> ParseResult<Url>1216 fn with_query_and_fragment(
1217 mut self,
1218 scheme_type: SchemeType,
1219 scheme_end: u32,
1220 username_end: u32,
1221 host_start: u32,
1222 host_end: u32,
1223 host: HostInternal,
1224 port: Option<u16>,
1225 path_start: u32,
1226 remaining: Input,
1227 ) -> ParseResult<Url> {
1228 let (query_start, fragment_start) =
1229 self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
1230 Ok(Url {
1231 serialization: self.serialization,
1232 scheme_end,
1233 username_end,
1234 host_start,
1235 host_end,
1236 host,
1237 port,
1238 path_start,
1239 query_start,
1240 fragment_start,
1241 })
1242 }
1243
1244 /// Return (query_start, fragment_start)
parse_query_and_fragment( &mut self, scheme_type: SchemeType, scheme_end: u32, mut input: Input, ) -> ParseResult<(Option<u32>, Option<u32>)>1245 fn parse_query_and_fragment(
1246 &mut self,
1247 scheme_type: SchemeType,
1248 scheme_end: u32,
1249 mut input: Input,
1250 ) -> ParseResult<(Option<u32>, Option<u32>)> {
1251 let mut query_start = None;
1252 match input.next() {
1253 Some('#') => {}
1254 Some('?') => {
1255 query_start = Some(to_u32(self.serialization.len())?);
1256 self.serialization.push('?');
1257 let remaining = self.parse_query(scheme_type, scheme_end, input);
1258 if let Some(remaining) = remaining {
1259 input = remaining
1260 } else {
1261 return Ok((query_start, None));
1262 }
1263 }
1264 None => return Ok((None, None)),
1265 _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
1266 }
1267
1268 let fragment_start = to_u32(self.serialization.len())?;
1269 self.serialization.push('#');
1270 self.parse_fragment(input);
1271 Ok((query_start, Some(fragment_start)))
1272 }
1273
parse_query<'i>( &mut self, scheme_type: SchemeType, scheme_end: u32, mut input: Input<'i>, ) -> Option<Input<'i>>1274 pub fn parse_query<'i>(
1275 &mut self,
1276 scheme_type: SchemeType,
1277 scheme_end: u32,
1278 mut input: Input<'i>,
1279 ) -> Option<Input<'i>> {
1280 let mut query = String::new(); // FIXME: use a streaming decoder instead
1281 let mut remaining = None;
1282 while let Some(c) = input.next() {
1283 if c == '#' && self.context == Context::UrlParser {
1284 remaining = Some(input);
1285 break;
1286 } else {
1287 self.check_url_code_point(c, &input);
1288 query.push(c);
1289 }
1290 }
1291
1292 let encoding = match &self.serialization[..scheme_end as usize] {
1293 "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override,
1294 _ => None,
1295 };
1296 let query_bytes = ::query_encoding::encode(encoding, &query);
1297 let set = if scheme_type.is_special() {
1298 SPECIAL_QUERY
1299 } else {
1300 QUERY
1301 };
1302 self.serialization.extend(percent_encode(&query_bytes, set));
1303 remaining
1304 }
1305
fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url>1306 fn fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url> {
1307 let before_fragment = match base_url.fragment_start {
1308 Some(i) => base_url.slice(..i),
1309 None => &*base_url.serialization,
1310 };
1311 debug_assert!(self.serialization.is_empty());
1312 self.serialization
1313 .reserve(before_fragment.len() + input.chars.as_str().len());
1314 self.serialization.push_str(before_fragment);
1315 self.serialization.push('#');
1316 let next = input.next();
1317 debug_assert!(next == Some('#'));
1318 self.parse_fragment(input);
1319 Ok(Url {
1320 serialization: self.serialization,
1321 fragment_start: Some(to_u32(before_fragment.len())?),
1322 ..*base_url
1323 })
1324 }
1325
parse_fragment(&mut self, mut input: Input)1326 pub fn parse_fragment(&mut self, mut input: Input) {
1327 while let Some((c, utf8_c)) = input.next_utf8() {
1328 if c == '\0' {
1329 self.log_violation(SyntaxViolation::NullInFragment)
1330 } else {
1331 self.check_url_code_point(c, &input);
1332 self.serialization.extend(utf8_percent_encode(
1333 utf8_c,
1334 // FIXME: tests fail when we use the FRAGMENT set here
1335 // as defined in the spec as of 2019-07-17,
1336 // likely because tests are out of date.
1337 // See https://github.com/servo/rust-url/issues/290
1338 CONTROLS,
1339 ));
1340 }
1341 }
1342 }
1343
check_url_code_point(&self, c: char, input: &Input)1344 fn check_url_code_point(&self, c: char, input: &Input) {
1345 if let Some(vfn) = self.violation_fn {
1346 if c == '%' {
1347 let mut input = input.clone();
1348 if !matches!((input.next(), input.next()), (Some(a), Some(b))
1349 if is_ascii_hex_digit(a) && is_ascii_hex_digit(b))
1350 {
1351 vfn(SyntaxViolation::PercentDecode)
1352 }
1353 } else if !is_url_code_point(c) {
1354 vfn(SyntaxViolation::NonUrlCodePoint)
1355 }
1356 }
1357 }
1358 }
1359
1360 #[inline]
is_ascii_hex_digit(c: char) -> bool1361 fn is_ascii_hex_digit(c: char) -> bool {
1362 matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
1363 }
1364
1365 // Non URL code points:
1366 // U+0000 to U+0020 (space)
1367 // " # % < > [ \ ] ^ ` { | }
1368 // U+007F to U+009F
1369 // surrogates
1370 // U+FDD0 to U+FDEF
1371 // Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
1372 #[inline]
is_url_code_point(c: char) -> bool1373 fn is_url_code_point(c: char) -> bool {
1374 matches!(c,
1375 'a'..='z' |
1376 'A'..='Z' |
1377 '0'..='9' |
1378 '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
1379 '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
1380 '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
1381 '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' |
1382 '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' |
1383 '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' |
1384 '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' |
1385 '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' |
1386 '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' |
1387 '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' |
1388 '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
1389 }
1390
1391 /// https://url.spec.whatwg.org/#c0-controls-and-space
1392 #[inline]
c0_control_or_space(ch: char) -> bool1393 fn c0_control_or_space(ch: char) -> bool {
1394 ch <= ' ' // U+0000 to U+0020
1395 }
1396
1397 /// https://url.spec.whatwg.org/#ascii-alpha
1398 #[inline]
ascii_alpha(ch: char) -> bool1399 pub fn ascii_alpha(ch: char) -> bool {
1400 matches!(ch, 'a'..='z' | 'A'..='Z')
1401 }
1402
1403 #[inline]
to_u32(i: usize) -> ParseResult<u32>1404 pub fn to_u32(i: usize) -> ParseResult<u32> {
1405 if i <= ::std::u32::MAX as usize {
1406 Ok(i as u32)
1407 } else {
1408 Err(ParseError::Overflow)
1409 }
1410 }
1411
1412 /// Wether the scheme is file:, the path has a single segment, and that segment
1413 /// is a Windows drive letter
is_windows_drive_letter(segment: &str) -> bool1414 fn is_windows_drive_letter(segment: &str) -> bool {
1415 segment.len() == 2 && starts_with_windows_drive_letter(segment)
1416 }
1417
starts_with_windows_drive_letter(s: &str) -> bool1418 fn starts_with_windows_drive_letter(s: &str) -> bool {
1419 ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|')
1420 }
1421
starts_with_windows_drive_letter_segment(input: &Input) -> bool1422 fn starts_with_windows_drive_letter_segment(input: &Input) -> bool {
1423 let mut input = input.clone();
1424 matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c))
1425 if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#'))
1426 }
1427