1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 /*!
10 
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13 
14 
15 # URL parsing and data structures
16 
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18 
19 ```
20 use url::{Url, ParseError};
21 
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24 
25 Let’s parse a valid URL and look at its components.
26 
27 ```
28 use url::{Url, Host, Position};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32     "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34 
35 
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44         Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47 assert!(issue_list_url.fragment() == None);
48 assert!(!issue_list_url.cannot_be_a_base());
49 # Ok(())
50 # }
51 # run().unwrap();
52 ```
53 
54 Some URLs are said to be *cannot-be-a-base*:
55 they don’t have a username, password, host, or port,
56 and their "path" is an arbitrary string rather than slash-separated segments:
57 
58 ```
59 use url::Url;
60 # use url::ParseError;
61 
62 # fn run() -> Result<(), ParseError> {
63 let data_url = Url::parse("data:text/plain,Hello?World#")?;
64 
65 assert!(data_url.cannot_be_a_base());
66 assert!(data_url.scheme() == "data");
67 assert!(data_url.path() == "text/plain,Hello");
68 assert!(data_url.path_segments().is_none());
69 assert!(data_url.query() == Some("World"));
70 assert!(data_url.fragment() == Some(""));
71 # Ok(())
72 # }
73 # run().unwrap();
74 ```
75 
76 ## Serde
77 
78 Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
79 
80 # Base URL
81 
82 Many contexts allow URL *references* that can be relative to a *base URL*:
83 
84 ```html
85 <link rel="stylesheet" href="../main.css">
86 ```
87 
88 Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
89 
90 ```
91 use url::{Url, ParseError};
92 
93 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
94 ```
95 
96 Use the `join` method on an `Url` to use it as a base URL:
97 
98 ```
99 use url::Url;
100 # use url::ParseError;
101 
102 # fn run() -> Result<(), ParseError> {
103 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
104 let css_url = this_document.join("../main.css")?;
105 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
106 # Ok(())
107 # }
108 # run().unwrap();
109 ```
110 
111 # Feature: `serde`
112 
113 If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
114 [`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
115 [`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
116 See [serde documentation](https://serde.rs) for more information.
117 
118 ```toml
119 url = { version = "2", features = ["serde"] }
120 ```
121 */
122 
123 #![doc(html_root_url = "https://docs.rs/url/2.2.1")]
124 
125 #[macro_use]
126 extern crate matches;
127 pub use form_urlencoded;
128 
129 #[cfg(feature = "serde")]
130 extern crate serde;
131 
132 use crate::host::HostInternal;
133 use crate::parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
134 use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
135 use std::borrow::Borrow;
136 use std::cmp;
137 use std::fmt::{self, Write};
138 use std::hash;
139 use std::io;
140 use std::mem;
141 use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
142 use std::ops::{Range, RangeFrom, RangeTo};
143 use std::path::{Path, PathBuf};
144 use std::str;
145 
146 use std::convert::TryFrom;
147 
148 pub use crate::host::Host;
149 pub use crate::origin::{OpaqueOrigin, Origin};
150 pub use crate::parser::{ParseError, SyntaxViolation};
151 pub use crate::path_segments::PathSegmentsMut;
152 pub use crate::slicing::Position;
153 pub use form_urlencoded::EncodingOverride;
154 
155 mod host;
156 mod origin;
157 mod parser;
158 mod path_segments;
159 mod slicing;
160 
161 #[doc(hidden)]
162 pub mod quirks;
163 
164 /// A parsed URL record.
165 #[derive(Clone)]
166 pub struct Url {
167     /// Syntax in pseudo-BNF:
168     ///
169     ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
170     ///   non-hierarchical = non-hierarchical-path
171     ///   non-hierarchical-path = /* Does not start with "/" */
172     ///   hierarchical = authority? hierarchical-path
173     ///   authority = "//" userinfo? host [ ":" port ]?
174     ///   userinfo = username [ ":" password ]? "@"
175     ///   hierarchical-path = [ "/" path-segment ]+
176     serialization: String,
177 
178     // Components
179     scheme_end: u32,   // Before ':'
180     username_end: u32, // Before ':' (if a password is given) or '@' (if not)
181     host_start: u32,
182     host_end: u32,
183     host: HostInternal,
184     port: Option<u16>,
185     path_start: u32,             // Before initial '/', if any
186     query_start: Option<u32>,    // Before '?', unlike Position::QueryStart
187     fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
188 }
189 
190 /// Full configuration for the URL parser.
191 #[derive(Copy, Clone)]
192 pub struct ParseOptions<'a> {
193     base_url: Option<&'a Url>,
194     encoding_override: EncodingOverride<'a>,
195     violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
196 }
197 
198 impl<'a> ParseOptions<'a> {
199     /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self200     pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
201         self.base_url = new;
202         self
203     }
204 
205     /// Override the character encoding of query strings.
206     /// This is a legacy concept only relevant for HTML.
encoding_override(mut self, new: EncodingOverride<'a>) -> Self207     pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
208         self.encoding_override = new;
209         self
210     }
211 
212     /// Call the provided function or closure for a non-fatal `SyntaxViolation`
213     /// when it occurs during parsing. Note that since the provided function is
214     /// `Fn`, the caller might need to utilize _interior mutability_, such as with
215     /// a `RefCell`, to collect the violations.
216     ///
217     /// ## Example
218     /// ```
219     /// use std::cell::RefCell;
220     /// use url::{Url, SyntaxViolation};
221     /// # use url::ParseError;
222     /// # fn run() -> Result<(), url::ParseError> {
223     /// let violations = RefCell::new(Vec::new());
224     /// let url = Url::options()
225     ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
226     ///     .parse("https:////example.com")?;
227     /// assert_eq!(url.as_str(), "https://example.com/");
228     /// assert_eq!(violations.into_inner(),
229     ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
230     /// # Ok(())
231     /// # }
232     /// # run().unwrap();
233     /// ```
syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self234     pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
235         self.violation_fn = new;
236         self
237     }
238 
239     /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, crate::ParseError>240     pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
241         Parser {
242             serialization: String::with_capacity(input.len()),
243             base_url: self.base_url,
244             query_encoding_override: self.encoding_override,
245             violation_fn: self.violation_fn,
246             context: Context::UrlParser,
247         }
248         .parse_url(input)
249     }
250 }
251 
252 impl Url {
253     /// Parse an absolute URL from a string.
254     ///
255     /// # Examples
256     ///
257     /// ```rust
258     /// use url::Url;
259     /// # use url::ParseError;
260     ///
261     /// # fn run() -> Result<(), ParseError> {
262     /// let url = Url::parse("https://example.net")?;
263     /// # Ok(())
264     /// # }
265     /// # run().unwrap();
266     /// ```
267     ///
268     /// # Errors
269     ///
270     /// If the function can not parse an absolute URL from the given string,
271     /// a [`ParseError`] variant will be returned.
272     ///
273     /// [`ParseError`]: enum.ParseError.html
274     #[inline]
parse(input: &str) -> Result<Url, crate::ParseError>275     pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
276         Url::options().parse(input)
277     }
278 
279     /// Parse an absolute URL from a string and add params to its query string.
280     ///
281     /// Existing params are not removed.
282     ///
283     /// # Examples
284     ///
285     /// ```rust
286     /// use url::Url;
287     /// # use url::ParseError;
288     ///
289     /// # fn run() -> Result<(), ParseError> {
290     /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
291     ///                                  &[("lang", "rust"), ("browser", "servo")])?;
292     /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
293     /// # Ok(())
294     /// # }
295     /// # run().unwrap();
296     /// ```
297     ///
298     /// # Errors
299     ///
300     /// If the function can not parse an absolute URL from the given string,
301     /// a [`ParseError`] variant will be returned.
302     ///
303     /// [`ParseError`]: enum.ParseError.html
304     #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>,305     pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
306     where
307         I: IntoIterator,
308         I::Item: Borrow<(K, V)>,
309         K: AsRef<str>,
310         V: AsRef<str>,
311     {
312         let mut url = Url::options().parse(input);
313 
314         if let Ok(ref mut url) = url {
315             url.query_pairs_mut().extend_pairs(iter);
316         }
317 
318         url
319     }
320 
321     /// Parse a string as an URL, with this URL as the base URL.
322     ///
323     /// Note: a trailing slash is significant.
324     /// Without it, the last path component is considered to be a “file” name
325     /// to be removed to get at the “directory” that is used as the base:
326     ///
327     /// # Examples
328     ///
329     /// ```rust
330     /// use url::Url;
331     /// # use url::ParseError;
332     ///
333     /// # fn run() -> Result<(), ParseError> {
334     /// let base = Url::parse("https://example.net/a/b.html")?;
335     /// let url = base.join("c.png")?;
336     /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
337     ///
338     /// let base = Url::parse("https://example.net/a/b/")?;
339     /// let url = base.join("c.png")?;
340     /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
341     /// # Ok(())
342     /// # }
343     /// # run().unwrap();
344     /// ```
345     ///
346     /// # Errors
347     ///
348     /// If the function can not parse an URL from the given string
349     /// with this URL as the base URL, a [`ParseError`] variant will be returned.
350     ///
351     /// [`ParseError`]: enum.ParseError.html
352     #[inline]
join(&self, input: &str) -> Result<Url, crate::ParseError>353     pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
354         Url::options().base_url(Some(self)).parse(input)
355     }
356 
357     /// Return a default `ParseOptions` that can fully configure the URL parser.
358     ///
359     /// # Examples
360     ///
361     /// Get default `ParseOptions`, then change base url
362     ///
363     /// ```rust
364     /// use url::Url;
365     /// # use url::ParseError;
366     /// # fn run() -> Result<(), ParseError> {
367     /// let options = Url::options();
368     /// let api = Url::parse("https://api.example.com")?;
369     /// let base_url = options.base_url(Some(&api));
370     /// let version_url = base_url.parse("version.json")?;
371     /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
372     /// # Ok(())
373     /// # }
374     /// # run().unwrap();
375     /// ```
options<'a>() -> ParseOptions<'a>376     pub fn options<'a>() -> ParseOptions<'a> {
377         ParseOptions {
378             base_url: None,
379             encoding_override: None,
380             violation_fn: None,
381         }
382     }
383 
384     /// Return the serialization of this URL.
385     ///
386     /// This is fast since that serialization is already stored in the `Url` struct.
387     ///
388     /// # Examples
389     ///
390     /// ```rust
391     /// use url::Url;
392     /// # use url::ParseError;
393     ///
394     /// # fn run() -> Result<(), ParseError> {
395     /// let url_str = "https://example.net/";
396     /// let url = Url::parse(url_str)?;
397     /// assert_eq!(url.as_str(), url_str);
398     /// # Ok(())
399     /// # }
400     /// # run().unwrap();
401     /// ```
402     #[inline]
as_str(&self) -> &str403     pub fn as_str(&self) -> &str {
404         &self.serialization
405     }
406 
407     /// Return the serialization of this URL.
408     ///
409     /// This consumes the `Url` and takes ownership of the `String` stored in it.
410     ///
411     /// # Examples
412     ///
413     /// ```rust
414     /// use url::Url;
415     /// # use url::ParseError;
416     ///
417     /// # fn run() -> Result<(), ParseError> {
418     /// let url_str = "https://example.net/";
419     /// let url = Url::parse(url_str)?;
420     /// assert_eq!(url.into_string(), url_str);
421     /// # Ok(())
422     /// # }
423     /// # run().unwrap();
424     /// ```
425     #[inline]
into_string(self) -> String426     pub fn into_string(self) -> String {
427         self.serialization
428     }
429 
430     /// For internal testing, not part of the public API.
431     ///
432     /// Methods of the `Url` struct assume a number of invariants.
433     /// This checks each of these invariants and panic if one is not met.
434     /// This is for testing rust-url itself.
435     #[doc(hidden)]
check_invariants(&self) -> Result<(), String>436     pub fn check_invariants(&self) -> Result<(), String> {
437         macro_rules! assert {
438             ($x: expr) => {
439                 if !$x {
440                     return Err(format!(
441                         "!( {} ) for URL {:?}",
442                         stringify!($x),
443                         self.serialization
444                     ));
445                 }
446             };
447         }
448 
449         macro_rules! assert_eq {
450             ($a: expr, $b: expr) => {
451                 {
452                     let a = $a;
453                     let b = $b;
454                     if a != b {
455                         return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
456                                            a, b, stringify!($a), stringify!($b),
457                                            self.serialization))
458                     }
459                 }
460             }
461         }
462 
463         assert!(self.scheme_end >= 1);
464         assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
465         assert!(self
466             .slice(1..self.scheme_end)
467             .chars()
468             .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
469         assert_eq!(self.byte_at(self.scheme_end), b':');
470 
471         if self.slice(self.scheme_end + 1..).starts_with("//") {
472             // URL with authority
473             if self.username_end != self.serialization.len() as u32 {
474                 match self.byte_at(self.username_end) {
475                     b':' => {
476                         assert!(self.host_start >= self.username_end + 2);
477                         assert_eq!(self.byte_at(self.host_start - 1), b'@');
478                     }
479                     b'@' => assert!(self.host_start == self.username_end + 1),
480                     _ => assert_eq!(self.username_end, self.scheme_end + 3),
481                 }
482             }
483             assert!(self.host_start >= self.username_end);
484             assert!(self.host_end >= self.host_start);
485             let host_str = self.slice(self.host_start..self.host_end);
486             match self.host {
487                 HostInternal::None => assert_eq!(host_str, ""),
488                 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
489                 HostInternal::Ipv6(address) => {
490                     let h: Host<String> = Host::Ipv6(address);
491                     assert_eq!(host_str, h.to_string())
492                 }
493                 HostInternal::Domain => {
494                     if SchemeType::from(self.scheme()).is_special() {
495                         assert!(!host_str.is_empty())
496                     }
497                 }
498             }
499             if self.path_start == self.host_end {
500                 assert_eq!(self.port, None);
501             } else {
502                 assert_eq!(self.byte_at(self.host_end), b':');
503                 let port_str = self.slice(self.host_end + 1..self.path_start);
504                 assert_eq!(
505                     self.port,
506                     Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
507                 );
508             }
509             assert!(
510                 self.path_start as usize == self.serialization.len()
511                     || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
512             );
513         } else {
514             // Anarchist URL (no authority)
515             assert_eq!(self.username_end, self.scheme_end + 1);
516             assert_eq!(self.host_start, self.scheme_end + 1);
517             assert_eq!(self.host_end, self.scheme_end + 1);
518             assert_eq!(self.host, HostInternal::None);
519             assert_eq!(self.port, None);
520             assert_eq!(self.path_start, self.scheme_end + 1);
521         }
522         if let Some(start) = self.query_start {
523             assert!(start >= self.path_start);
524             assert_eq!(self.byte_at(start), b'?');
525         }
526         if let Some(start) = self.fragment_start {
527             assert!(start >= self.path_start);
528             assert_eq!(self.byte_at(start), b'#');
529         }
530         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
531             assert!(fragment_start > query_start);
532         }
533 
534         let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
535         assert_eq!(&self.serialization, &other.serialization);
536         assert_eq!(self.scheme_end, other.scheme_end);
537         assert_eq!(self.username_end, other.username_end);
538         assert_eq!(self.host_start, other.host_start);
539         assert_eq!(self.host_end, other.host_end);
540         assert!(
541             self.host == other.host ||
542                 // XXX No host round-trips to empty host.
543                 // See https://github.com/whatwg/url/issues/79
544                 (self.host_str(), other.host_str()) == (None, Some(""))
545         );
546         assert_eq!(self.port, other.port);
547         assert_eq!(self.path_start, other.path_start);
548         assert_eq!(self.query_start, other.query_start);
549         assert_eq!(self.fragment_start, other.fragment_start);
550         Ok(())
551     }
552 
553     /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
554     ///
555     /// Note: this returns an opaque origin for `file:` URLs, which causes
556     /// `url.origin() != url.origin()`.
557     ///
558     /// # Examples
559     ///
560     /// URL with `ftp` scheme:
561     ///
562     /// ```rust
563     /// use url::{Host, Origin, Url};
564     /// # use url::ParseError;
565     ///
566     /// # fn run() -> Result<(), ParseError> {
567     /// let url = Url::parse("ftp://example.com/foo")?;
568     /// assert_eq!(url.origin(),
569     ///            Origin::Tuple("ftp".into(),
570     ///                          Host::Domain("example.com".into()),
571     ///                          21));
572     /// # Ok(())
573     /// # }
574     /// # run().unwrap();
575     /// ```
576     ///
577     /// URL with `blob` scheme:
578     ///
579     /// ```rust
580     /// use url::{Host, Origin, Url};
581     /// # use url::ParseError;
582     ///
583     /// # fn run() -> Result<(), ParseError> {
584     /// let url = Url::parse("blob:https://example.com/foo")?;
585     /// assert_eq!(url.origin(),
586     ///            Origin::Tuple("https".into(),
587     ///                          Host::Domain("example.com".into()),
588     ///                          443));
589     /// # Ok(())
590     /// # }
591     /// # run().unwrap();
592     /// ```
593     ///
594     /// URL with `file` scheme:
595     ///
596     /// ```rust
597     /// use url::{Host, Origin, Url};
598     /// # use url::ParseError;
599     ///
600     /// # fn run() -> Result<(), ParseError> {
601     /// let url = Url::parse("file:///tmp/foo")?;
602     /// assert!(!url.origin().is_tuple());
603     ///
604     /// let other_url = Url::parse("file:///tmp/foo")?;
605     /// assert!(url.origin() != other_url.origin());
606     /// # Ok(())
607     /// # }
608     /// # run().unwrap();
609     /// ```
610     ///
611     /// URL with other scheme:
612     ///
613     /// ```rust
614     /// use url::{Host, Origin, Url};
615     /// # use url::ParseError;
616     ///
617     /// # fn run() -> Result<(), ParseError> {
618     /// let url = Url::parse("foo:bar")?;
619     /// assert!(!url.origin().is_tuple());
620     /// # Ok(())
621     /// # }
622     /// # run().unwrap();
623     /// ```
624     #[inline]
origin(&self) -> Origin625     pub fn origin(&self) -> Origin {
626         origin::url_origin(self)
627     }
628 
629     /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
630     ///
631     /// # Examples
632     ///
633     /// ```
634     /// use url::Url;
635     /// # use url::ParseError;
636     ///
637     /// # fn run() -> Result<(), ParseError> {
638     /// let url = Url::parse("file:///tmp/foo")?;
639     /// assert_eq!(url.scheme(), "file");
640     /// # Ok(())
641     /// # }
642     /// # run().unwrap();
643     /// ```
644     #[inline]
scheme(&self) -> &str645     pub fn scheme(&self) -> &str {
646         self.slice(..self.scheme_end)
647     }
648 
649     /// Return whether the URL has an 'authority',
650     /// which can contain a username, password, host, and port number.
651     ///
652     /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
653     /// or cannot-be-a-base like `data:text/plain,Stuff`.
654     ///
655     /// # Examples
656     ///
657     /// ```
658     /// use url::Url;
659     /// # use url::ParseError;
660     ///
661     /// # fn run() -> Result<(), ParseError> {
662     /// let url = Url::parse("ftp://rms@example.com")?;
663     /// assert!(url.has_authority());
664     ///
665     /// let url = Url::parse("unix:/run/foo.socket")?;
666     /// assert!(!url.has_authority());
667     ///
668     /// let url = Url::parse("data:text/plain,Stuff")?;
669     /// assert!(!url.has_authority());
670     /// # Ok(())
671     /// # }
672     /// # run().unwrap();
673     /// ```
674     #[inline]
has_authority(&self) -> bool675     pub fn has_authority(&self) -> bool {
676         debug_assert!(self.byte_at(self.scheme_end) == b':');
677         self.slice(self.scheme_end..).starts_with("://")
678     }
679 
680     /// Return whether this URL is a cannot-be-a-base URL,
681     /// meaning that parsing a relative URL string with this URL as the base will return an error.
682     ///
683     /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
684     /// as is typically the case of `data:` and `mailto:` URLs.
685     ///
686     /// # Examples
687     ///
688     /// ```
689     /// use url::Url;
690     /// # use url::ParseError;
691     ///
692     /// # fn run() -> Result<(), ParseError> {
693     /// let url = Url::parse("ftp://rms@example.com")?;
694     /// assert!(!url.cannot_be_a_base());
695     ///
696     /// let url = Url::parse("unix:/run/foo.socket")?;
697     /// assert!(!url.cannot_be_a_base());
698     ///
699     /// let url = Url::parse("data:text/plain,Stuff")?;
700     /// assert!(url.cannot_be_a_base());
701     /// # Ok(())
702     /// # }
703     /// # run().unwrap();
704     /// ```
705     #[inline]
cannot_be_a_base(&self) -> bool706     pub fn cannot_be_a_base(&self) -> bool {
707         !self.slice(self.scheme_end + 1..).starts_with('/')
708     }
709 
710     /// Return the username for this URL (typically the empty string)
711     /// as a percent-encoded ASCII string.
712     ///
713     /// # Examples
714     ///
715     /// ```
716     /// use url::Url;
717     /// # use url::ParseError;
718     ///
719     /// # fn run() -> Result<(), ParseError> {
720     /// let url = Url::parse("ftp://rms@example.com")?;
721     /// assert_eq!(url.username(), "rms");
722     ///
723     /// let url = Url::parse("ftp://:secret123@example.com")?;
724     /// assert_eq!(url.username(), "");
725     ///
726     /// let url = Url::parse("https://example.com")?;
727     /// assert_eq!(url.username(), "");
728     /// # Ok(())
729     /// # }
730     /// # run().unwrap();
731     /// ```
username(&self) -> &str732     pub fn username(&self) -> &str {
733         let scheme_separator_len = "://".len() as u32;
734         if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
735             self.slice(self.scheme_end + scheme_separator_len..self.username_end)
736         } else {
737             ""
738         }
739     }
740 
741     /// Return the password for this URL, if any, as a percent-encoded ASCII string.
742     ///
743     /// # Examples
744     ///
745     /// ```
746     /// use url::Url;
747     /// # use url::ParseError;
748     ///
749     /// # fn run() -> Result<(), ParseError> {
750     /// let url = Url::parse("ftp://rms:secret123@example.com")?;
751     /// assert_eq!(url.password(), Some("secret123"));
752     ///
753     /// let url = Url::parse("ftp://:secret123@example.com")?;
754     /// assert_eq!(url.password(), Some("secret123"));
755     ///
756     /// let url = Url::parse("ftp://rms@example.com")?;
757     /// assert_eq!(url.password(), None);
758     ///
759     /// let url = Url::parse("https://example.com")?;
760     /// assert_eq!(url.password(), None);
761     /// # Ok(())
762     /// # }
763     /// # run().unwrap();
764     /// ```
password(&self) -> Option<&str>765     pub fn password(&self) -> Option<&str> {
766         // This ':' is not the one marking a port number since a host can not be empty.
767         // (Except for file: URLs, which do not have port numbers.)
768         if self.has_authority()
769             && self.username_end != self.serialization.len() as u32
770             && self.byte_at(self.username_end) == b':'
771         {
772             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
773             Some(self.slice(self.username_end + 1..self.host_start - 1))
774         } else {
775             None
776         }
777     }
778 
779     /// Equivalent to `url.host().is_some()`.
780     ///
781     /// # Examples
782     ///
783     /// ```
784     /// use url::Url;
785     /// # use url::ParseError;
786     ///
787     /// # fn run() -> Result<(), ParseError> {
788     /// let url = Url::parse("ftp://rms@example.com")?;
789     /// assert!(url.has_host());
790     ///
791     /// let url = Url::parse("unix:/run/foo.socket")?;
792     /// assert!(!url.has_host());
793     ///
794     /// let url = Url::parse("data:text/plain,Stuff")?;
795     /// assert!(!url.has_host());
796     /// # Ok(())
797     /// # }
798     /// # run().unwrap();
799     /// ```
has_host(&self) -> bool800     pub fn has_host(&self) -> bool {
801         !matches!(self.host, HostInternal::None)
802     }
803 
804     /// Return the string representation of the host (domain or IP address) for this URL, if any.
805     ///
806     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
807     /// of a special URL, or percent encoded for non-special URLs.
808     /// IPv6 addresses are given between `[` and `]` brackets.
809     ///
810     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
811     /// don’t have a host.
812     ///
813     /// See also the `host` method.
814     ///
815     /// # Examples
816     ///
817     /// ```
818     /// use url::Url;
819     /// # use url::ParseError;
820     ///
821     /// # fn run() -> Result<(), ParseError> {
822     /// let url = Url::parse("https://127.0.0.1/index.html")?;
823     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
824     ///
825     /// let url = Url::parse("ftp://rms@example.com")?;
826     /// assert_eq!(url.host_str(), Some("example.com"));
827     ///
828     /// let url = Url::parse("unix:/run/foo.socket")?;
829     /// assert_eq!(url.host_str(), None);
830     ///
831     /// let url = Url::parse("data:text/plain,Stuff")?;
832     /// assert_eq!(url.host_str(), None);
833     /// # Ok(())
834     /// # }
835     /// # run().unwrap();
836     /// ```
host_str(&self) -> Option<&str>837     pub fn host_str(&self) -> Option<&str> {
838         if self.has_host() {
839             Some(self.slice(self.host_start..self.host_end))
840         } else {
841             None
842         }
843     }
844 
845     /// Return the parsed representation of the host for this URL.
846     /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
847     /// of a special URL, or percent encoded for non-special URLs.
848     ///
849     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
850     /// don’t have a host.
851     ///
852     /// See also the `host_str` method.
853     ///
854     /// # Examples
855     ///
856     /// ```
857     /// use url::Url;
858     /// # use url::ParseError;
859     ///
860     /// # fn run() -> Result<(), ParseError> {
861     /// let url = Url::parse("https://127.0.0.1/index.html")?;
862     /// assert!(url.host().is_some());
863     ///
864     /// let url = Url::parse("ftp://rms@example.com")?;
865     /// assert!(url.host().is_some());
866     ///
867     /// let url = Url::parse("unix:/run/foo.socket")?;
868     /// assert!(url.host().is_none());
869     ///
870     /// let url = Url::parse("data:text/plain,Stuff")?;
871     /// assert!(url.host().is_none());
872     /// # Ok(())
873     /// # }
874     /// # run().unwrap();
875     /// ```
host(&self) -> Option<Host<&str>>876     pub fn host(&self) -> Option<Host<&str>> {
877         match self.host {
878             HostInternal::None => None,
879             HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
880             HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
881             HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
882         }
883     }
884 
885     /// If this URL has a host and it is a domain name (not an IP address), return it.
886     /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
887     /// of a special URL, or percent encoded for non-special URLs.
888     ///
889     /// # Examples
890     ///
891     /// ```
892     /// use url::Url;
893     /// # use url::ParseError;
894     ///
895     /// # fn run() -> Result<(), ParseError> {
896     /// let url = Url::parse("https://127.0.0.1/")?;
897     /// assert_eq!(url.domain(), None);
898     ///
899     /// let url = Url::parse("mailto:rms@example.net")?;
900     /// assert_eq!(url.domain(), None);
901     ///
902     /// let url = Url::parse("https://example.com/")?;
903     /// assert_eq!(url.domain(), Some("example.com"));
904     /// # Ok(())
905     /// # }
906     /// # run().unwrap();
907     /// ```
domain(&self) -> Option<&str>908     pub fn domain(&self) -> Option<&str> {
909         match self.host {
910             HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
911             _ => None,
912         }
913     }
914 
915     /// Return the port number for this URL, if any.
916     ///
917     /// Note that default port numbers are never reflected by the serialization,
918     /// use the `port_or_known_default()` method if you want a default port number returned.
919     ///
920     /// # Examples
921     ///
922     /// ```
923     /// use url::Url;
924     /// # use url::ParseError;
925     ///
926     /// # fn run() -> Result<(), ParseError> {
927     /// let url = Url::parse("https://example.com")?;
928     /// assert_eq!(url.port(), None);
929     ///
930     /// let url = Url::parse("https://example.com:443/")?;
931     /// assert_eq!(url.port(), None);
932     ///
933     /// let url = Url::parse("ssh://example.com:22")?;
934     /// assert_eq!(url.port(), Some(22));
935     /// # Ok(())
936     /// # }
937     /// # run().unwrap();
938     /// ```
939     #[inline]
port(&self) -> Option<u16>940     pub fn port(&self) -> Option<u16> {
941         self.port
942     }
943 
944     /// Return the port number for this URL, or the default port number if it is known.
945     ///
946     /// This method only knows the default port number
947     /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
948     ///
949     /// For URLs in these schemes, this method always returns `Some(_)`.
950     /// For other schemes, it is the same as `Url::port()`.
951     ///
952     /// # Examples
953     ///
954     /// ```
955     /// use url::Url;
956     /// # use url::ParseError;
957     ///
958     /// # fn run() -> Result<(), ParseError> {
959     /// let url = Url::parse("foo://example.com")?;
960     /// assert_eq!(url.port_or_known_default(), None);
961     ///
962     /// let url = Url::parse("foo://example.com:1456")?;
963     /// assert_eq!(url.port_or_known_default(), Some(1456));
964     ///
965     /// let url = Url::parse("https://example.com")?;
966     /// assert_eq!(url.port_or_known_default(), Some(443));
967     /// # Ok(())
968     /// # }
969     /// # run().unwrap();
970     /// ```
971     #[inline]
port_or_known_default(&self) -> Option<u16>972     pub fn port_or_known_default(&self) -> Option<u16> {
973         self.port.or_else(|| parser::default_port(self.scheme()))
974     }
975 
976     /// Resolve a URL’s host and port number to `SocketAddr`.
977     ///
978     /// If the URL has the default port number of a scheme that is unknown to this library,
979     /// `default_port_number` provides an opportunity to provide the actual port number.
980     /// In non-example code this should be implemented either simply as `|| None`,
981     /// or by matching on the URL’s `.scheme()`.
982     ///
983     /// If the host is a domain, it is resolved using the standard library’s DNS support.
984     ///
985     /// # Examples
986     ///
987     /// ```no_run
988     /// let url = url::Url::parse("https://example.net/").unwrap();
989     /// let addrs = url.socket_addrs(|| None).unwrap();
990     /// std::net::TcpStream::connect(&*addrs)
991     /// # ;
992     /// ```
993     ///
994     /// ```
995     /// /// With application-specific known default port numbers
996     /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
997     ///     url.socket_addrs(|| match url.scheme() {
998     ///         "socks5" | "socks5h" => Some(1080),
999     ///         _ => None,
1000     ///     })
1001     /// }
1002     /// ```
socket_addrs( &self, default_port_number: impl Fn() -> Option<u16>, ) -> io::Result<Vec<SocketAddr>>1003     pub fn socket_addrs(
1004         &self,
1005         default_port_number: impl Fn() -> Option<u16>,
1006     ) -> io::Result<Vec<SocketAddr>> {
1007         // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1008         // causes borrowck issues because the return value borrows `default_port_number`:
1009         //
1010         // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1011         //
1012         // > This RFC proposes that *all* type parameters are considered in scope
1013         // > for `impl Trait` in return position
1014 
1015         fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1016             opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1017         }
1018 
1019         let host = io_result(self.host(), "No host name in the URL")?;
1020         let port = io_result(
1021             self.port_or_known_default().or_else(default_port_number),
1022             "No port number in the URL",
1023         )?;
1024         Ok(match host {
1025             Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1026             Host::Ipv4(ip) => vec![(ip, port).into()],
1027             Host::Ipv6(ip) => vec![(ip, port).into()],
1028         })
1029     }
1030 
1031     /// Return the path for this URL, as a percent-encoded ASCII string.
1032     /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1033     /// For other URLs, this starts with a '/' slash
1034     /// and continues with slash-separated path segments.
1035     ///
1036     /// # Examples
1037     ///
1038     /// ```rust
1039     /// use url::{Url, ParseError};
1040     ///
1041     /// # fn run() -> Result<(), ParseError> {
1042     /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1043     /// assert_eq!(url.path(), "/api/versions");
1044     ///
1045     /// let url = Url::parse("https://example.com")?;
1046     /// assert_eq!(url.path(), "/");
1047     ///
1048     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1049     /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1050     /// # Ok(())
1051     /// # }
1052     /// # run().unwrap();
1053     /// ```
path(&self) -> &str1054     pub fn path(&self) -> &str {
1055         match (self.query_start, self.fragment_start) {
1056             (None, None) => self.slice(self.path_start..),
1057             (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1058                 self.slice(self.path_start..next_component_start)
1059             }
1060         }
1061     }
1062 
1063     /// Unless this URL is cannot-be-a-base,
1064     /// return an iterator of '/' slash-separated path segments,
1065     /// each as a percent-encoded ASCII string.
1066     ///
1067     /// Return `None` for cannot-be-a-base URLs.
1068     ///
1069     /// When `Some` is returned, the iterator always contains at least one string
1070     /// (which may be empty).
1071     ///
1072     /// # Examples
1073     ///
1074     /// ```
1075     /// use url::Url;
1076     /// # use std::error::Error;
1077     ///
1078     /// # fn run() -> Result<(), Box<dyn Error>> {
1079     /// let url = Url::parse("https://example.com/foo/bar")?;
1080     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1081     /// assert_eq!(path_segments.next(), Some("foo"));
1082     /// assert_eq!(path_segments.next(), Some("bar"));
1083     /// assert_eq!(path_segments.next(), None);
1084     ///
1085     /// let url = Url::parse("https://example.com")?;
1086     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1087     /// assert_eq!(path_segments.next(), Some(""));
1088     /// assert_eq!(path_segments.next(), None);
1089     ///
1090     /// let url = Url::parse("data:text/plain,HelloWorld")?;
1091     /// assert!(url.path_segments().is_none());
1092     ///
1093     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1094     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1095     /// assert_eq!(path_segments.next(), Some("countries"));
1096     /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1097     /// # Ok(())
1098     /// # }
1099     /// # run().unwrap();
1100     /// ```
1101     #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36
path_segments(&self) -> Option<str::Split<'_, char>>1102     pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1103         let path = self.path();
1104         if path.starts_with('/') {
1105             Some(path[1..].split('/'))
1106         } else {
1107             None
1108         }
1109     }
1110 
1111     /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1112     ///
1113     /// # Examples
1114     ///
1115     /// ```rust
1116     /// use url::Url;
1117     /// # use url::ParseError;
1118     ///
1119     /// fn run() -> Result<(), ParseError> {
1120     /// let url = Url::parse("https://example.com/products?page=2")?;
1121     /// let query = url.query();
1122     /// assert_eq!(query, Some("page=2"));
1123     ///
1124     /// let url = Url::parse("https://example.com/products")?;
1125     /// let query = url.query();
1126     /// assert!(query.is_none());
1127     ///
1128     /// let url = Url::parse("https://example.com/?country=español")?;
1129     /// let query = url.query();
1130     /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1131     /// # Ok(())
1132     /// # }
1133     /// # run().unwrap();
1134     /// ```
query(&self) -> Option<&str>1135     pub fn query(&self) -> Option<&str> {
1136         match (self.query_start, self.fragment_start) {
1137             (None, _) => None,
1138             (Some(query_start), None) => {
1139                 debug_assert!(self.byte_at(query_start) == b'?');
1140                 Some(self.slice(query_start + 1..))
1141             }
1142             (Some(query_start), Some(fragment_start)) => {
1143                 debug_assert!(self.byte_at(query_start) == b'?');
1144                 Some(self.slice(query_start + 1..fragment_start))
1145             }
1146         }
1147     }
1148 
1149     /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1150     /// and return an iterator of (key, value) pairs.
1151     ///
1152     /// # Examples
1153     ///
1154     /// ```rust
1155     /// use std::borrow::Cow;
1156     ///
1157     /// use url::Url;
1158     /// # use url::ParseError;
1159     ///
1160     /// # fn run() -> Result<(), ParseError> {
1161     /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1162     /// let mut pairs = url.query_pairs();
1163     ///
1164     /// assert_eq!(pairs.count(), 2);
1165     ///
1166     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1167     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1168     /// # Ok(())
1169     /// # }
1170     /// # run().unwrap();
1171     ///
1172 
1173     #[inline]
query_pairs(&self) -> form_urlencoded::Parse<'_>1174     pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1175         form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1176     }
1177 
1178     /// Return this URL’s fragment identifier, if any.
1179     ///
1180     /// A fragment is the part of the URL after the `#` symbol.
1181     /// The fragment is optional and, if present, contains a fragment identifier
1182     /// that identifies a secondary resource, such as a section heading
1183     /// of a document.
1184     ///
1185     /// In HTML, the fragment identifier is usually the id attribute of a an element
1186     /// that is scrolled to on load. Browsers typically will not send the fragment portion
1187     /// of a URL to the server.
1188     ///
1189     /// **Note:** the parser did *not* percent-encode this component,
1190     /// but the input may have been percent-encoded already.
1191     ///
1192     /// # Examples
1193     ///
1194     /// ```rust
1195     /// use url::Url;
1196     /// # use url::ParseError;
1197     ///
1198     /// # fn run() -> Result<(), ParseError> {
1199     /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1200     ///
1201     /// assert_eq!(url.fragment(), Some("row=4"));
1202     ///
1203     /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1204     ///
1205     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1206     /// # Ok(())
1207     /// # }
1208     /// # run().unwrap();
1209     /// ```
fragment(&self) -> Option<&str>1210     pub fn fragment(&self) -> Option<&str> {
1211         self.fragment_start.map(|start| {
1212             debug_assert!(self.byte_at(start) == b'#');
1213             self.slice(start + 1..)
1214         })
1215     }
1216 
mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R1217     fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1218         let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
1219         let result = f(&mut parser);
1220         self.serialization = parser.serialization;
1221         result
1222     }
1223 
1224     /// Change this URL’s fragment identifier.
1225     ///
1226     /// # Examples
1227     ///
1228     /// ```rust
1229     /// use url::Url;
1230     /// # use url::ParseError;
1231     ///
1232     /// # fn run() -> Result<(), ParseError> {
1233     /// let mut url = Url::parse("https://example.com/data.csv")?;
1234     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1235 
1236     /// url.set_fragment(Some("cell=4,1-6,2"));
1237     /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1238     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1239     ///
1240     /// url.set_fragment(None);
1241     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1242     /// assert!(url.fragment().is_none());
1243     /// # Ok(())
1244     /// # }
1245     /// # run().unwrap();
1246     /// ```
set_fragment(&mut self, fragment: Option<&str>)1247     pub fn set_fragment(&mut self, fragment: Option<&str>) {
1248         // Remove any previous fragment
1249         if let Some(start) = self.fragment_start {
1250             debug_assert!(self.byte_at(start) == b'#');
1251             self.serialization.truncate(start as usize);
1252         }
1253         // Write the new one
1254         if let Some(input) = fragment {
1255             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1256             self.serialization.push('#');
1257             self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
1258         } else {
1259             self.fragment_start = None
1260         }
1261     }
1262 
take_fragment(&mut self) -> Option<String>1263     fn take_fragment(&mut self) -> Option<String> {
1264         self.fragment_start.take().map(|start| {
1265             debug_assert!(self.byte_at(start) == b'#');
1266             let fragment = self.slice(start + 1..).to_owned();
1267             self.serialization.truncate(start as usize);
1268             fragment
1269         })
1270     }
1271 
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1272     fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1273         if let Some(ref fragment) = fragment {
1274             assert!(self.fragment_start.is_none());
1275             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1276             self.serialization.push('#');
1277             self.serialization.push_str(fragment);
1278         }
1279     }
1280 
1281     /// Change this URL’s query string.
1282     ///
1283     /// # Examples
1284     ///
1285     /// ```rust
1286     /// use url::Url;
1287     /// # use url::ParseError;
1288     ///
1289     /// # fn run() -> Result<(), ParseError> {
1290     /// let mut url = Url::parse("https://example.com/products")?;
1291     /// assert_eq!(url.as_str(), "https://example.com/products");
1292     ///
1293     /// url.set_query(Some("page=2"));
1294     /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1295     /// assert_eq!(url.query(), Some("page=2"));
1296     /// # Ok(())
1297     /// # }
1298     /// # run().unwrap();
1299     /// ```
set_query(&mut self, query: Option<&str>)1300     pub fn set_query(&mut self, query: Option<&str>) {
1301         let fragment = self.take_fragment();
1302 
1303         // Remove any previous query
1304         if let Some(start) = self.query_start.take() {
1305             debug_assert!(self.byte_at(start) == b'?');
1306             self.serialization.truncate(start as usize);
1307         }
1308         // Write the new query, if any
1309         if let Some(input) = query {
1310             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1311             self.serialization.push('?');
1312             let scheme_type = SchemeType::from(self.scheme());
1313             let scheme_end = self.scheme_end;
1314             self.mutate(|parser| {
1315                 let vfn = parser.violation_fn;
1316                 parser.parse_query(
1317                     scheme_type,
1318                     scheme_end,
1319                     parser::Input::trim_tab_and_newlines(input, vfn),
1320                 )
1321             });
1322         }
1323 
1324         self.restore_already_parsed_fragment(fragment);
1325     }
1326 
1327     /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1328     /// in `application/x-www-form-urlencoded` syntax.
1329     ///
1330     /// The return value has a method-chaining API:
1331     ///
1332     /// ```rust
1333     /// # use url::{Url, ParseError};
1334     ///
1335     /// # fn run() -> Result<(), ParseError> {
1336     /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1337     /// assert_eq!(url.query(), Some("lang=fr"));
1338     ///
1339     /// url.query_pairs_mut().append_pair("foo", "bar");
1340     /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1341     /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1342     ///
1343     /// url.query_pairs_mut()
1344     ///     .clear()
1345     ///     .append_pair("foo", "bar & baz")
1346     ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1347     /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1348     /// assert_eq!(url.as_str(),
1349     ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1350     /// # Ok(())
1351     /// # }
1352     /// # run().unwrap();
1353     /// ```
1354     ///
1355     /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1356     /// not `url.set_query(None)`.
1357     ///
1358     /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>>1359     pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1360         let fragment = self.take_fragment();
1361 
1362         let query_start;
1363         if let Some(start) = self.query_start {
1364             debug_assert!(self.byte_at(start) == b'?');
1365             query_start = start as usize;
1366         } else {
1367             query_start = self.serialization.len();
1368             self.query_start = Some(to_u32(query_start).unwrap());
1369             self.serialization.push('?');
1370         }
1371 
1372         let query = UrlQuery {
1373             url: Some(self),
1374             fragment,
1375         };
1376         form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1377     }
1378 
take_after_path(&mut self) -> String1379     fn take_after_path(&mut self) -> String {
1380         match (self.query_start, self.fragment_start) {
1381             (Some(i), _) | (None, Some(i)) => {
1382                 let after_path = self.slice(i..).to_owned();
1383                 self.serialization.truncate(i as usize);
1384                 after_path
1385             }
1386             (None, None) => String::new(),
1387         }
1388     }
1389 
1390     /// Change this URL’s path.
1391     ///
1392     /// # Examples
1393     ///
1394     /// ```rust
1395     /// use url::Url;
1396     /// # use url::ParseError;
1397     ///
1398     /// # fn run() -> Result<(), ParseError> {
1399     /// let mut url = Url::parse("https://example.com")?;
1400     /// url.set_path("api/comments");
1401     /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1402     /// assert_eq!(url.path(), "/api/comments");
1403     ///
1404     /// let mut url = Url::parse("https://example.com/api")?;
1405     /// url.set_path("data/report.csv");
1406     /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1407     /// assert_eq!(url.path(), "/data/report.csv");
1408     /// # Ok(())
1409     /// # }
1410     /// # run().unwrap();
1411     /// ```
set_path(&mut self, mut path: &str)1412     pub fn set_path(&mut self, mut path: &str) {
1413         let after_path = self.take_after_path();
1414         let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1415         let cannot_be_a_base = self.cannot_be_a_base();
1416         let scheme_type = SchemeType::from(self.scheme());
1417         self.serialization.truncate(self.path_start as usize);
1418         self.mutate(|parser| {
1419             if cannot_be_a_base {
1420                 if path.starts_with('/') {
1421                     parser.serialization.push_str("%2F");
1422                     path = &path[1..];
1423                 }
1424                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1425             } else {
1426                 let mut has_host = true; // FIXME
1427                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1428             }
1429         });
1430         self.restore_after_path(old_after_path_pos, &after_path);
1431     }
1432 
1433     /// Return an object with methods to manipulate this URL’s path segments.
1434     ///
1435     /// Return `Err(())` if this URL is cannot-be-a-base.
1436     #[allow(clippy::clippy::result_unit_err)]
path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()>1437     pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1438         if self.cannot_be_a_base() {
1439             Err(())
1440         } else {
1441             Ok(path_segments::new(self))
1442         }
1443     }
1444 
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1445     fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1446         let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1447         let adjust = |index: &mut u32| {
1448             *index -= old_after_path_position;
1449             *index += new_after_path_position;
1450         };
1451         if let Some(ref mut index) = self.query_start {
1452             adjust(index)
1453         }
1454         if let Some(ref mut index) = self.fragment_start {
1455             adjust(index)
1456         }
1457         self.serialization.push_str(after_path)
1458     }
1459 
1460     /// Change this URL’s port number.
1461     ///
1462     /// Note that default port numbers are not reflected in the serialization.
1463     ///
1464     /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1465     /// do nothing and return `Err`.
1466     ///
1467     /// # Examples
1468     ///
1469     /// ```
1470     /// use url::Url;
1471     /// # use std::error::Error;
1472     ///
1473     /// # fn run() -> Result<(), Box<dyn Error>> {
1474     /// let mut url = Url::parse("ssh://example.net:2048/")?;
1475     ///
1476     /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1477     /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1478     ///
1479     /// url.set_port(None).map_err(|_| "cannot be base")?;
1480     /// assert_eq!(url.as_str(), "ssh://example.net/");
1481     /// # Ok(())
1482     /// # }
1483     /// # run().unwrap();
1484     /// ```
1485     ///
1486     /// Known default port numbers are not reflected:
1487     ///
1488     /// ```rust
1489     /// use url::Url;
1490     /// # use std::error::Error;
1491     ///
1492     /// # fn run() -> Result<(), Box<dyn Error>> {
1493     /// let mut url = Url::parse("https://example.org/")?;
1494     ///
1495     /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1496     /// assert!(url.port().is_none());
1497     /// # Ok(())
1498     /// # }
1499     /// # run().unwrap();
1500     /// ```
1501     ///
1502     /// Cannot set port for cannot-be-a-base URLs:
1503     ///
1504     /// ```
1505     /// use url::Url;
1506     /// # use url::ParseError;
1507     ///
1508     /// # fn run() -> Result<(), ParseError> {
1509     /// let mut url = Url::parse("mailto:rms@example.net")?;
1510     ///
1511     /// let result = url.set_port(Some(80));
1512     /// assert!(result.is_err());
1513     ///
1514     /// let result = url.set_port(None);
1515     /// assert!(result.is_err());
1516     /// # Ok(())
1517     /// # }
1518     /// # run().unwrap();
1519     /// ```
1520     #[allow(clippy::clippy::result_unit_err)]
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1521     pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1522         // has_host implies !cannot_be_a_base
1523         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1524             return Err(());
1525         }
1526         if port.is_some() && port == parser::default_port(self.scheme()) {
1527             port = None
1528         }
1529         self.set_port_internal(port);
1530         Ok(())
1531     }
1532 
set_port_internal(&mut self, port: Option<u16>)1533     fn set_port_internal(&mut self, port: Option<u16>) {
1534         match (self.port, port) {
1535             (None, None) => {}
1536             (Some(_), None) => {
1537                 self.serialization
1538                     .drain(self.host_end as usize..self.path_start as usize);
1539                 let offset = self.path_start - self.host_end;
1540                 self.path_start = self.host_end;
1541                 if let Some(ref mut index) = self.query_start {
1542                     *index -= offset
1543                 }
1544                 if let Some(ref mut index) = self.fragment_start {
1545                     *index -= offset
1546                 }
1547             }
1548             (Some(old), Some(new)) if old == new => {}
1549             (_, Some(new)) => {
1550                 let path_and_after = self.slice(self.path_start..).to_owned();
1551                 self.serialization.truncate(self.host_end as usize);
1552                 write!(&mut self.serialization, ":{}", new).unwrap();
1553                 let old_path_start = self.path_start;
1554                 let new_path_start = to_u32(self.serialization.len()).unwrap();
1555                 self.path_start = new_path_start;
1556                 let adjust = |index: &mut u32| {
1557                     *index -= old_path_start;
1558                     *index += new_path_start;
1559                 };
1560                 if let Some(ref mut index) = self.query_start {
1561                     adjust(index)
1562                 }
1563                 if let Some(ref mut index) = self.fragment_start {
1564                     adjust(index)
1565                 }
1566                 self.serialization.push_str(&path_and_after);
1567             }
1568         }
1569         self.port = port;
1570     }
1571 
1572     /// Change this URL’s host.
1573     ///
1574     /// Removing the host (calling this with `None`)
1575     /// will also remove any username, password, and port number.
1576     ///
1577     /// # Examples
1578     ///
1579     /// Change host:
1580     ///
1581     /// ```
1582     /// use url::Url;
1583     /// # use url::ParseError;
1584     ///
1585     /// # fn run() -> Result<(), ParseError> {
1586     /// let mut url = Url::parse("https://example.net")?;
1587     /// let result = url.set_host(Some("rust-lang.org"));
1588     /// assert!(result.is_ok());
1589     /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1590     /// # Ok(())
1591     /// # }
1592     /// # run().unwrap();
1593     /// ```
1594     ///
1595     /// Remove host:
1596     ///
1597     /// ```
1598     /// use url::Url;
1599     /// # use url::ParseError;
1600     ///
1601     /// # fn run() -> Result<(), ParseError> {
1602     /// let mut url = Url::parse("foo://example.net")?;
1603     /// let result = url.set_host(None);
1604     /// assert!(result.is_ok());
1605     /// assert_eq!(url.as_str(), "foo:/");
1606     /// # Ok(())
1607     /// # }
1608     /// # run().unwrap();
1609     /// ```
1610     ///
1611     /// Cannot remove host for 'special' schemes (e.g. `http`):
1612     ///
1613     /// ```
1614     /// use url::Url;
1615     /// # use url::ParseError;
1616     ///
1617     /// # fn run() -> Result<(), ParseError> {
1618     /// let mut url = Url::parse("https://example.net")?;
1619     /// let result = url.set_host(None);
1620     /// assert!(result.is_err());
1621     /// assert_eq!(url.as_str(), "https://example.net/");
1622     /// # Ok(())
1623     /// # }
1624     /// # run().unwrap();
1625     /// ```
1626     ///
1627     /// Cannot change or remove host for cannot-be-a-base URLs:
1628     ///
1629     /// ```
1630     /// use url::Url;
1631     /// # use url::ParseError;
1632     ///
1633     /// # fn run() -> Result<(), ParseError> {
1634     /// let mut url = Url::parse("mailto:rms@example.net")?;
1635     ///
1636     /// let result = url.set_host(Some("rust-lang.org"));
1637     /// assert!(result.is_err());
1638     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1639     ///
1640     /// let result = url.set_host(None);
1641     /// assert!(result.is_err());
1642     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1643     /// # Ok(())
1644     /// # }
1645     /// # run().unwrap();
1646     /// ```
1647     ///
1648     /// # Errors
1649     ///
1650     /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1651     /// a [`ParseError`] variant will be returned.
1652     ///
1653     /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1654     pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1655         if self.cannot_be_a_base() {
1656             return Err(ParseError::SetHostOnCannotBeABaseUrl);
1657         }
1658 
1659         if let Some(host) = host {
1660             if host.is_empty() && SchemeType::from(self.scheme()).is_special() {
1661                 return Err(ParseError::EmptyHost);
1662             }
1663             let mut host_substr = host;
1664             // Otherwise, if c is U+003A (:) and the [] flag is unset, then
1665             if !host.starts_with('[') || !host.ends_with(']') {
1666                 match host.find(':') {
1667                     Some(0) => {
1668                         // If buffer is the empty string, validation error, return failure.
1669                         return Err(ParseError::InvalidDomainCharacter);
1670                     }
1671                     // Let host be the result of host parsing buffer
1672                     Some(colon_index) => {
1673                         host_substr = &host[..colon_index];
1674                     }
1675                     None => {}
1676                 }
1677             }
1678             if SchemeType::from(self.scheme()).is_special() {
1679                 self.set_host_internal(Host::parse(host_substr)?, None);
1680             } else {
1681                 self.set_host_internal(Host::parse_opaque(host_substr)?, None);
1682             }
1683         } else if self.has_host() {
1684             let scheme_type = SchemeType::from(self.scheme());
1685             if scheme_type.is_special() {
1686                 return Err(ParseError::EmptyHost);
1687             } else if self.serialization.len() == self.path_start as usize {
1688                 self.serialization.push('/');
1689             }
1690             debug_assert!(self.byte_at(self.scheme_end) == b':');
1691             debug_assert!(self.byte_at(self.path_start) == b'/');
1692             let new_path_start = self.scheme_end + 1;
1693             self.serialization
1694                 .drain(new_path_start as usize..self.path_start as usize);
1695             let offset = self.path_start - new_path_start;
1696             self.path_start = new_path_start;
1697             self.username_end = new_path_start;
1698             self.host_start = new_path_start;
1699             self.host_end = new_path_start;
1700             self.port = None;
1701             if let Some(ref mut index) = self.query_start {
1702                 *index -= offset
1703             }
1704             if let Some(ref mut index) = self.fragment_start {
1705                 *index -= offset
1706             }
1707         }
1708         Ok(())
1709     }
1710 
1711     /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1712     fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1713         let old_suffix_pos = if opt_new_port.is_some() {
1714             self.path_start
1715         } else {
1716             self.host_end
1717         };
1718         let suffix = self.slice(old_suffix_pos..).to_owned();
1719         self.serialization.truncate(self.host_start as usize);
1720         if !self.has_authority() {
1721             debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1722             debug_assert!(self.username_end == self.host_start);
1723             self.serialization.push('/');
1724             self.serialization.push('/');
1725             self.username_end += 2;
1726             self.host_start += 2;
1727         }
1728         write!(&mut self.serialization, "{}", host).unwrap();
1729         self.host_end = to_u32(self.serialization.len()).unwrap();
1730         self.host = host.into();
1731 
1732         if let Some(new_port) = opt_new_port {
1733             self.port = new_port;
1734             if let Some(port) = new_port {
1735                 write!(&mut self.serialization, ":{}", port).unwrap();
1736             }
1737         }
1738         let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1739         self.serialization.push_str(&suffix);
1740 
1741         let adjust = |index: &mut u32| {
1742             *index -= old_suffix_pos;
1743             *index += new_suffix_pos;
1744         };
1745         adjust(&mut self.path_start);
1746         if let Some(ref mut index) = self.query_start {
1747             adjust(index)
1748         }
1749         if let Some(ref mut index) = self.fragment_start {
1750             adjust(index)
1751         }
1752     }
1753 
1754     /// Change this URL’s host to the given IP address.
1755     ///
1756     /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1757     ///
1758     /// Compared to `Url::set_host`, this skips the host parser.
1759     ///
1760     /// # Examples
1761     ///
1762     /// ```rust
1763     /// use url::{Url, ParseError};
1764     ///
1765     /// # fn run() -> Result<(), ParseError> {
1766     /// let mut url = Url::parse("http://example.com")?;
1767     /// url.set_ip_host("127.0.0.1".parse().unwrap());
1768     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1769     /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1770     /// # Ok(())
1771     /// # }
1772     /// # run().unwrap();
1773     /// ```
1774     ///
1775     /// Cannot change URL's from mailto(cannot-be-base) to ip:
1776     ///
1777     /// ```rust
1778     /// use url::{Url, ParseError};
1779     ///
1780     /// # fn run() -> Result<(), ParseError> {
1781     /// let mut url = Url::parse("mailto:rms@example.com")?;
1782     /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1783     ///
1784     /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1785     /// assert!(result.is_err());
1786     /// # Ok(())
1787     /// # }
1788     /// # run().unwrap();
1789     /// ```
1790     ///
1791     #[allow(clippy::clippy::result_unit_err)]
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1792     pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1793         if self.cannot_be_a_base() {
1794             return Err(());
1795         }
1796 
1797         let address = match address {
1798             IpAddr::V4(address) => Host::Ipv4(address),
1799             IpAddr::V6(address) => Host::Ipv6(address),
1800         };
1801         self.set_host_internal(address, None);
1802         Ok(())
1803     }
1804 
1805     /// Change this URL’s password.
1806     ///
1807     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1808     ///
1809     /// # Examples
1810     ///
1811     /// ```rust
1812     /// use url::{Url, ParseError};
1813     ///
1814     /// # fn run() -> Result<(), ParseError> {
1815     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1816     /// let result = url.set_password(Some("secret_password"));
1817     /// assert!(result.is_err());
1818     ///
1819     /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1820     /// let result = url.set_password(Some("secret_password"));
1821     /// assert_eq!(url.password(), Some("secret_password"));
1822     ///
1823     /// let mut url = Url::parse("ftp://user2:@example.com")?;
1824     /// let result = url.set_password(Some("secret2"));
1825     /// assert!(result.is_ok());
1826     /// assert_eq!(url.password(), Some("secret2"));
1827     /// # Ok(())
1828     /// # }
1829     /// # run().unwrap();
1830     /// ```
1831     #[allow(clippy::clippy::result_unit_err)]
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1832     pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1833         // has_host implies !cannot_be_a_base
1834         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1835             return Err(());
1836         }
1837         if let Some(password) = password {
1838             let host_and_after = self.slice(self.host_start..).to_owned();
1839             self.serialization.truncate(self.username_end as usize);
1840             self.serialization.push(':');
1841             self.serialization
1842                 .extend(utf8_percent_encode(password, USERINFO));
1843             self.serialization.push('@');
1844 
1845             let old_host_start = self.host_start;
1846             let new_host_start = to_u32(self.serialization.len()).unwrap();
1847             let adjust = |index: &mut u32| {
1848                 *index -= old_host_start;
1849                 *index += new_host_start;
1850             };
1851             self.host_start = new_host_start;
1852             adjust(&mut self.host_end);
1853             adjust(&mut self.path_start);
1854             if let Some(ref mut index) = self.query_start {
1855                 adjust(index)
1856             }
1857             if let Some(ref mut index) = self.fragment_start {
1858                 adjust(index)
1859             }
1860 
1861             self.serialization.push_str(&host_and_after);
1862         } else if self.byte_at(self.username_end) == b':' {
1863             // If there is a password to remove
1864             let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
1865             debug_assert!(has_username_or_password);
1866             let username_start = self.scheme_end + 3;
1867             let empty_username = username_start == self.username_end;
1868             let start = self.username_end; // Remove the ':'
1869             let end = if empty_username {
1870                 self.host_start // Remove the '@' as well
1871             } else {
1872                 self.host_start - 1 // Keep the '@' to separate the username from the host
1873             };
1874             self.serialization.drain(start as usize..end as usize);
1875             let offset = end - start;
1876             self.host_start -= offset;
1877             self.host_end -= offset;
1878             self.path_start -= offset;
1879             if let Some(ref mut index) = self.query_start {
1880                 *index -= offset
1881             }
1882             if let Some(ref mut index) = self.fragment_start {
1883                 *index -= offset
1884             }
1885         }
1886         Ok(())
1887     }
1888 
1889     /// Change this URL’s username.
1890     ///
1891     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1892     /// # Examples
1893     ///
1894     /// Cannot setup username from mailto(cannot-be-base)
1895     ///
1896     /// ```rust
1897     /// use url::{Url, ParseError};
1898     ///
1899     /// # fn run() -> Result<(), ParseError> {
1900     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1901     /// let result = url.set_username("user1");
1902     /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
1903     /// assert!(result.is_err());
1904     /// # Ok(())
1905     /// # }
1906     /// # run().unwrap();
1907     /// ```
1908     ///
1909     /// Setup username to user1
1910     ///
1911     /// ```rust
1912     /// use url::{Url, ParseError};
1913     ///
1914     /// # fn run() -> Result<(), ParseError> {
1915     /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
1916     /// let result = url.set_username("user1");
1917     /// assert!(result.is_ok());
1918     /// assert_eq!(url.username(), "user1");
1919     /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
1920     /// # Ok(())
1921     /// # }
1922     /// # run().unwrap();
1923     /// ```
1924     #[allow(clippy::clippy::result_unit_err)]
set_username(&mut self, username: &str) -> Result<(), ()>1925     pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
1926         // has_host implies !cannot_be_a_base
1927         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1928             return Err(());
1929         }
1930         let username_start = self.scheme_end + 3;
1931         debug_assert!(self.slice(self.scheme_end..username_start) == "://");
1932         if self.slice(username_start..self.username_end) == username {
1933             return Ok(());
1934         }
1935         let after_username = self.slice(self.username_end..).to_owned();
1936         self.serialization.truncate(username_start as usize);
1937         self.serialization
1938             .extend(utf8_percent_encode(username, USERINFO));
1939 
1940         let mut removed_bytes = self.username_end;
1941         self.username_end = to_u32(self.serialization.len()).unwrap();
1942         let mut added_bytes = self.username_end;
1943 
1944         let new_username_is_empty = self.username_end == username_start;
1945         match (new_username_is_empty, after_username.chars().next()) {
1946             (true, Some('@')) => {
1947                 removed_bytes += 1;
1948                 self.serialization.push_str(&after_username[1..]);
1949             }
1950             (false, Some('@')) | (_, Some(':')) | (true, _) => {
1951                 self.serialization.push_str(&after_username);
1952             }
1953             (false, _) => {
1954                 added_bytes += 1;
1955                 self.serialization.push('@');
1956                 self.serialization.push_str(&after_username);
1957             }
1958         }
1959 
1960         let adjust = |index: &mut u32| {
1961             *index -= removed_bytes;
1962             *index += added_bytes;
1963         };
1964         adjust(&mut self.host_start);
1965         adjust(&mut self.host_end);
1966         adjust(&mut self.path_start);
1967         if let Some(ref mut index) = self.query_start {
1968             adjust(index)
1969         }
1970         if let Some(ref mut index) = self.fragment_start {
1971             adjust(index)
1972         }
1973         Ok(())
1974     }
1975 
1976     /// Change this URL’s scheme.
1977     ///
1978     /// Do nothing and return `Err` under the following circumstances:
1979     ///
1980     /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
1981     /// * If this URL is cannot-be-a-base and the new scheme is one of
1982     ///   `http`, `https`, `ws`, `wss` or `ftp`
1983     /// * If either the old or new scheme is `http`, `https`, `ws`,
1984     ///   `wss` or `ftp` and the other is not one of these
1985     /// * If the new scheme is `file` and this URL includes credentials
1986     ///   or has a non-null port
1987     /// * If this URL's scheme is `file` and its host is empty or null
1988     ///
1989     /// See also [the URL specification's section on legal scheme state
1990     /// overrides](https://url.spec.whatwg.org/#scheme-state).
1991     ///
1992     /// # Examples
1993     ///
1994     /// Change the URL’s scheme from `https` to `foo`:
1995     ///
1996     /// ```
1997     /// use url::Url;
1998     /// # use url::ParseError;
1999     ///
2000     /// # fn run() -> Result<(), ParseError> {
2001     /// let mut url = Url::parse("https://example.net")?;
2002     /// let result = url.set_scheme("http");
2003     /// assert_eq!(url.as_str(), "http://example.net/");
2004     /// assert!(result.is_ok());
2005     /// # Ok(())
2006     /// # }
2007     /// # run().unwrap();
2008     /// ```
2009     /// Change the URL’s scheme from `foo` to `bar`:
2010     ///
2011     /// ```
2012     /// use url::Url;
2013     /// # use url::ParseError;
2014     ///
2015     /// # fn run() -> Result<(), ParseError> {
2016     /// let mut url = Url::parse("foo://example.net")?;
2017     /// let result = url.set_scheme("bar");
2018     /// assert_eq!(url.as_str(), "bar://example.net");
2019     /// assert!(result.is_ok());
2020     /// # Ok(())
2021     /// # }
2022     /// # run().unwrap();
2023     /// ```
2024     ///
2025     /// Cannot change URL’s scheme from `https` to `foõ`:
2026     ///
2027     /// ```
2028     /// use url::Url;
2029     /// # use url::ParseError;
2030     ///
2031     /// # fn run() -> Result<(), ParseError> {
2032     /// let mut url = Url::parse("https://example.net")?;
2033     /// let result = url.set_scheme("foõ");
2034     /// assert_eq!(url.as_str(), "https://example.net/");
2035     /// assert!(result.is_err());
2036     /// # Ok(())
2037     /// # }
2038     /// # run().unwrap();
2039     /// ```
2040     ///
2041     /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2042     ///
2043     /// ```
2044     /// use url::Url;
2045     /// # use url::ParseError;
2046     ///
2047     /// # fn run() -> Result<(), ParseError> {
2048     /// let mut url = Url::parse("mailto:rms@example.net")?;
2049     /// let result = url.set_scheme("https");
2050     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2051     /// assert!(result.is_err());
2052     /// # Ok(())
2053     /// # }
2054     /// # run().unwrap();
2055     /// ```
2056     /// Cannot change the URL’s scheme from `foo` to `https`:
2057     ///
2058     /// ```
2059     /// use url::Url;
2060     /// # use url::ParseError;
2061     ///
2062     /// # fn run() -> Result<(), ParseError> {
2063     /// let mut url = Url::parse("foo://example.net")?;
2064     /// let result = url.set_scheme("https");
2065     /// assert_eq!(url.as_str(), "foo://example.net");
2066     /// assert!(result.is_err());
2067     /// # Ok(())
2068     /// # }
2069     /// # run().unwrap();
2070     /// ```
2071     /// Cannot change the URL’s scheme from `http` to `foo`:
2072     ///
2073     /// ```
2074     /// use url::Url;
2075     /// # use url::ParseError;
2076     ///
2077     /// # fn run() -> Result<(), ParseError> {
2078     /// let mut url = Url::parse("http://example.net")?;
2079     /// let result = url.set_scheme("foo");
2080     /// assert_eq!(url.as_str(), "http://example.net/");
2081     /// assert!(result.is_err());
2082     /// # Ok(())
2083     /// # }
2084     /// # run().unwrap();
2085     /// ```
2086     #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
set_scheme(&mut self, scheme: &str) -> Result<(), ()>2087     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2088         let mut parser = Parser::for_setter(String::new());
2089         let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
2090         let new_scheme_type = SchemeType::from(&parser.serialization);
2091         let old_scheme_type = SchemeType::from(self.scheme());
2092         // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2093         if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2094             // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2095             (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2096             // If url includes credentials or has a non-null port, and buffer is "file", then return.
2097             // If url’s scheme is "file" and its host is an empty host or null, then return.
2098             (new_scheme_type.is_file() && self.has_authority())
2099         {
2100             return Err(());
2101         }
2102 
2103         if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2104             return Err(());
2105         }
2106         let old_scheme_end = self.scheme_end;
2107         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2108         let adjust = |index: &mut u32| {
2109             *index -= old_scheme_end;
2110             *index += new_scheme_end;
2111         };
2112 
2113         self.scheme_end = new_scheme_end;
2114         adjust(&mut self.username_end);
2115         adjust(&mut self.host_start);
2116         adjust(&mut self.host_end);
2117         adjust(&mut self.path_start);
2118         if let Some(ref mut index) = self.query_start {
2119             adjust(index)
2120         }
2121         if let Some(ref mut index) = self.fragment_start {
2122             adjust(index)
2123         }
2124 
2125         parser.serialization.push_str(self.slice(old_scheme_end..));
2126         self.serialization = parser.serialization;
2127 
2128         // Update the port so it can be removed
2129         // If it is the scheme's default
2130         // we don't mind it silently failing
2131         // if there was no port in the first place
2132         let previous_port = self.port();
2133         let _ = self.set_port(previous_port);
2134 
2135         Ok(())
2136     }
2137 
2138     /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2139     ///
2140     /// This returns `Err` if the given path is not absolute or,
2141     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2142     ///
2143     /// # Examples
2144     ///
2145     /// On Unix-like platforms:
2146     ///
2147     /// ```
2148     /// # if cfg!(unix) {
2149     /// use url::Url;
2150     ///
2151     /// # fn run() -> Result<(), ()> {
2152     /// let url = Url::from_file_path("/tmp/foo.txt")?;
2153     /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2154     ///
2155     /// let url = Url::from_file_path("../foo.txt");
2156     /// assert!(url.is_err());
2157     ///
2158     /// let url = Url::from_file_path("https://google.com/");
2159     /// assert!(url.is_err());
2160     /// # Ok(())
2161     /// # }
2162     /// # run().unwrap();
2163     /// # }
2164     /// ```
2165     #[cfg(any(unix, windows, target_os = "redox"))]
2166     #[allow(clippy::clippy::result_unit_err)]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2167     pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2168         let mut serialization = "file://".to_owned();
2169         let host_start = serialization.len() as u32;
2170         let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2171         Ok(Url {
2172             serialization,
2173             scheme_end: "file".len() as u32,
2174             username_end: host_start,
2175             host_start,
2176             host_end,
2177             host,
2178             port: None,
2179             path_start: host_end,
2180             query_start: None,
2181             fragment_start: None,
2182         })
2183     }
2184 
2185     /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2186     ///
2187     /// This returns `Err` if the given path is not absolute or,
2188     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2189     ///
2190     /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2191     /// so that the entire path is considered when using this URL as a base URL.
2192     ///
2193     /// For example:
2194     ///
2195     /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2196     ///   as the base URL is `file:///var/www/index.html`
2197     /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2198     ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2199     ///
2200     /// Note that `std::path` does not consider trailing slashes significant
2201     /// and usually does not include them (e.g. in `Path::parent()`).
2202     #[cfg(any(unix, windows, target_os = "redox"))]
2203     #[allow(clippy::clippy::result_unit_err)]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2204     pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2205         let mut url = Url::from_file_path(path)?;
2206         if !url.serialization.ends_with('/') {
2207             url.serialization.push('/')
2208         }
2209         Ok(url)
2210     }
2211 
2212     /// Serialize with Serde using the internal representation of the `Url` struct.
2213     ///
2214     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2215     /// for speed, compared to the `Deserialize` trait impl.
2216     ///
2217     /// This method is only available if the `serde` Cargo feature is enabled.
2218     #[cfg(feature = "serde")]
2219     #[deny(unused)]
serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2220     pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2221     where
2222         S: serde::Serializer,
2223     {
2224         use serde::Serialize;
2225         // Destructuring first lets us ensure that adding or removing fields forces this method
2226         // to be updated
2227         let Url {
2228             ref serialization,
2229             ref scheme_end,
2230             ref username_end,
2231             ref host_start,
2232             ref host_end,
2233             ref host,
2234             ref port,
2235             ref path_start,
2236             ref query_start,
2237             ref fragment_start,
2238         } = *self;
2239         (
2240             serialization,
2241             scheme_end,
2242             username_end,
2243             host_start,
2244             host_end,
2245             host,
2246             port,
2247             path_start,
2248             query_start,
2249             fragment_start,
2250         )
2251             .serialize(serializer)
2252     }
2253 
2254     /// Serialize with Serde using the internal representation of the `Url` struct.
2255     ///
2256     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2257     /// for speed, compared to the `Deserialize` trait impl.
2258     ///
2259     /// This method is only available if the `serde` Cargo feature is enabled.
2260     #[cfg(feature = "serde")]
2261     #[deny(unused)]
deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer<'de>,2262     pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2263     where
2264         D: serde::Deserializer<'de>,
2265     {
2266         use serde::de::{Deserialize, Error, Unexpected};
2267         let (
2268             serialization,
2269             scheme_end,
2270             username_end,
2271             host_start,
2272             host_end,
2273             host,
2274             port,
2275             path_start,
2276             query_start,
2277             fragment_start,
2278         ) = Deserialize::deserialize(deserializer)?;
2279         let url = Url {
2280             serialization,
2281             scheme_end,
2282             username_end,
2283             host_start,
2284             host_end,
2285             host,
2286             port,
2287             path_start,
2288             query_start,
2289             fragment_start,
2290         };
2291         if cfg!(debug_assertions) {
2292             url.check_invariants().map_err(|reason| {
2293                 let reason: &str = &reason;
2294                 Error::invalid_value(Unexpected::Other("value"), &reason)
2295             })?
2296         }
2297         Ok(url)
2298     }
2299 
2300     /// Assuming the URL is in the `file` scheme or similar,
2301     /// convert its path to an absolute `std::path::Path`.
2302     ///
2303     /// **Note:** This does not actually check the URL’s `scheme`,
2304     /// and may give nonsensical results for other schemes.
2305     /// It is the user’s responsibility to check the URL’s scheme before calling this.
2306     ///
2307     /// ```
2308     /// # use url::Url;
2309     /// # let url = Url::parse("file:///etc/passwd").unwrap();
2310     /// let path = url.to_file_path();
2311     /// ```
2312     ///
2313     /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2314     /// `file:` URLs may have a non-local host),
2315     /// or if `Path::new_opt()` returns `None`.
2316     /// (That is, if the percent-decoded path contains a NUL byte or,
2317     /// for a Windows path, is not UTF-8.)
2318     #[inline]
2319     #[cfg(any(unix, windows, target_os = "redox"))]
2320     #[allow(clippy::clippy::result_unit_err)]
to_file_path(&self) -> Result<PathBuf, ()>2321     pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2322         if let Some(segments) = self.path_segments() {
2323             let host = match self.host() {
2324                 None | Some(Host::Domain("localhost")) => None,
2325                 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2326                     Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2327                 }
2328                 _ => return Err(()),
2329             };
2330 
2331             return file_url_segments_to_pathbuf(host, segments);
2332         }
2333         Err(())
2334     }
2335 
2336     // Private helper methods:
2337 
2338     #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg,2339     fn slice<R>(&self, range: R) -> &str
2340     where
2341         R: RangeArg,
2342     {
2343         range.slice_of(&self.serialization)
2344     }
2345 
2346     #[inline]
byte_at(&self, i: u32) -> u82347     fn byte_at(&self, i: u32) -> u8 {
2348         self.serialization.as_bytes()[i as usize]
2349     }
2350 }
2351 
2352 /// Parse a string as an URL, without a base URL or encoding override.
2353 impl str::FromStr for Url {
2354     type Err = ParseError;
2355 
2356     #[inline]
from_str(input: &str) -> Result<Url, crate::ParseError>2357     fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2358         Url::parse(input)
2359     }
2360 }
2361 
2362 impl<'a> TryFrom<&'a str> for Url {
2363     type Error = ParseError;
2364 
try_from(s: &'a str) -> Result<Self, Self::Error>2365     fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2366         Url::parse(s)
2367     }
2368 }
2369 
2370 /// Display the serialization of this URL.
2371 impl fmt::Display for Url {
2372     #[inline]
fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result2373     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2374         fmt::Display::fmt(&self.serialization, formatter)
2375     }
2376 }
2377 
2378 /// Debug the serialization of this URL.
2379 impl fmt::Debug for Url {
2380     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2381     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2382         formatter
2383             .debug_struct("Url")
2384             .field("scheme", &self.scheme())
2385             .field("username", &self.username())
2386             .field("password", &self.password())
2387             .field("host", &self.host())
2388             .field("port", &self.port())
2389             .field("path", &self.path())
2390             .field("query", &self.query())
2391             .field("fragment", &self.fragment())
2392             .finish()
2393     }
2394 }
2395 
2396 /// URLs compare like their serialization.
2397 impl Eq for Url {}
2398 
2399 /// URLs compare like their serialization.
2400 impl PartialEq for Url {
2401     #[inline]
eq(&self, other: &Self) -> bool2402     fn eq(&self, other: &Self) -> bool {
2403         self.serialization == other.serialization
2404     }
2405 }
2406 
2407 /// URLs compare like their serialization.
2408 impl Ord for Url {
2409     #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2410     fn cmp(&self, other: &Self) -> cmp::Ordering {
2411         self.serialization.cmp(&other.serialization)
2412     }
2413 }
2414 
2415 /// URLs compare like their serialization.
2416 impl PartialOrd for Url {
2417     #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2418     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2419         self.serialization.partial_cmp(&other.serialization)
2420     }
2421 }
2422 
2423 /// URLs hash like their serialization.
2424 impl hash::Hash for Url {
2425     #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher,2426     fn hash<H>(&self, state: &mut H)
2427     where
2428         H: hash::Hasher,
2429     {
2430         hash::Hash::hash(&self.serialization, state)
2431     }
2432 }
2433 
2434 /// Return the serialization of this URL.
2435 impl AsRef<str> for Url {
2436     #[inline]
as_ref(&self) -> &str2437     fn as_ref(&self) -> &str {
2438         &self.serialization
2439     }
2440 }
2441 
2442 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2443     fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2444 }
2445 
2446 impl RangeArg for Range<u32> {
2447     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2448     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2449         &s[self.start as usize..self.end as usize]
2450     }
2451 }
2452 
2453 impl RangeArg for RangeFrom<u32> {
2454     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2455     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2456         &s[self.start as usize..]
2457     }
2458 }
2459 
2460 impl RangeArg for RangeTo<u32> {
2461     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2462     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2463         &s[..self.end as usize]
2464     }
2465 }
2466 
2467 /// Serializes this URL into a `serde` stream.
2468 ///
2469 /// This implementation is only available if the `serde` Cargo feature is enabled.
2470 #[cfg(feature = "serde")]
2471 impl serde::Serialize for Url {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer,2472     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2473     where
2474         S: serde::Serializer,
2475     {
2476         serializer.serialize_str(self.as_str())
2477     }
2478 }
2479 
2480 /// Deserializes this URL from a `serde` stream.
2481 ///
2482 /// This implementation is only available if the `serde` Cargo feature is enabled.
2483 #[cfg(feature = "serde")]
2484 impl<'de> serde::Deserialize<'de> for Url {
deserialize<D>(deserializer: D) -> Result<Url, D::Error> where D: serde::Deserializer<'de>,2485     fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2486     where
2487         D: serde::Deserializer<'de>,
2488     {
2489         use serde::de::{Error, Unexpected, Visitor};
2490 
2491         struct UrlVisitor;
2492 
2493         impl<'de> Visitor<'de> for UrlVisitor {
2494             type Value = Url;
2495 
2496             fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2497                 formatter.write_str("a string representing an URL")
2498             }
2499 
2500             fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2501             where
2502                 E: Error,
2503             {
2504                 Url::parse(s).map_err(|err| {
2505                     let err_s = format!("{}", err);
2506                     Error::invalid_value(Unexpected::Str(s), &err_s.as_str())
2507                 })
2508             }
2509         }
2510 
2511         deserializer.deserialize_str(UrlVisitor)
2512     }
2513 }
2514 
2515 #[cfg(any(unix, target_os = "redox"))]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2516 fn path_to_file_url_segments(
2517     path: &Path,
2518     serialization: &mut String,
2519 ) -> Result<(u32, HostInternal), ()> {
2520     use std::os::unix::prelude::OsStrExt;
2521     if !path.is_absolute() {
2522         return Err(());
2523     }
2524     let host_end = to_u32(serialization.len()).unwrap();
2525     let mut empty = true;
2526     // skip the root component
2527     for component in path.components().skip(1) {
2528         empty = false;
2529         serialization.push('/');
2530         serialization.extend(percent_encode(
2531             component.as_os_str().as_bytes(),
2532             PATH_SEGMENT,
2533         ));
2534     }
2535     if empty {
2536         // An URL’s path must not be empty.
2537         serialization.push('/');
2538     }
2539     Ok((host_end, HostInternal::None))
2540 }
2541 
2542 #[cfg(windows)]
path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2543 fn path_to_file_url_segments(
2544     path: &Path,
2545     serialization: &mut String,
2546 ) -> Result<(u32, HostInternal), ()> {
2547     path_to_file_url_segments_windows(path, serialization)
2548 }
2549 
2550 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2551 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()>2552 fn path_to_file_url_segments_windows(
2553     path: &Path,
2554     serialization: &mut String,
2555 ) -> Result<(u32, HostInternal), ()> {
2556     use std::path::{Component, Prefix};
2557     if !path.is_absolute() {
2558         return Err(());
2559     }
2560     let mut components = path.components();
2561 
2562     let host_start = serialization.len() + 1;
2563     let host_end;
2564     let host_internal;
2565     match components.next() {
2566         Some(Component::Prefix(ref p)) => match p.kind() {
2567             Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2568                 host_end = to_u32(serialization.len()).unwrap();
2569                 host_internal = HostInternal::None;
2570                 serialization.push('/');
2571                 serialization.push(letter as char);
2572                 serialization.push(':');
2573             }
2574             Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2575                 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2576                 write!(serialization, "{}", host).unwrap();
2577                 host_end = to_u32(serialization.len()).unwrap();
2578                 host_internal = host.into();
2579                 serialization.push('/');
2580                 let share = share.to_str().ok_or(())?;
2581                 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2582             }
2583             _ => return Err(()),
2584         },
2585 
2586         _ => return Err(()),
2587     }
2588 
2589     let mut path_only_has_prefix = true;
2590     for component in components {
2591         if component == Component::RootDir {
2592             continue;
2593         }
2594         path_only_has_prefix = false;
2595         // FIXME: somehow work with non-unicode?
2596         let component = component.as_os_str().to_str().ok_or(())?;
2597         serialization.push('/');
2598         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
2599     }
2600     // A windows drive letter must end with a slash.
2601     if serialization.len() > host_start
2602         && parser::is_windows_drive_letter(&serialization[host_start..])
2603         && path_only_has_prefix
2604     {
2605         serialization.push('/');
2606     }
2607     Ok((host_end, host_internal))
2608 }
2609 
2610 #[cfg(any(unix, target_os = "redox"))]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2611 fn file_url_segments_to_pathbuf(
2612     host: Option<&str>,
2613     segments: str::Split<'_, char>,
2614 ) -> Result<PathBuf, ()> {
2615     use std::ffi::OsStr;
2616     use std::os::unix::prelude::OsStrExt;
2617 
2618     if host.is_some() {
2619         return Err(());
2620     }
2621 
2622     let mut bytes = if cfg!(target_os = "redox") {
2623         b"file:".to_vec()
2624     } else {
2625         Vec::new()
2626     };
2627     for segment in segments {
2628         bytes.push(b'/');
2629         bytes.extend(percent_decode(segment.as_bytes()));
2630     }
2631     // A windows drive letter must end with a slash.
2632     if bytes.len() > 2
2633         && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
2634         && matches!(bytes[bytes.len() - 1], b':' | b'|')
2635     {
2636         bytes.push(b'/');
2637     }
2638     let os_str = OsStr::from_bytes(&bytes);
2639     let path = PathBuf::from(os_str);
2640     debug_assert!(
2641         path.is_absolute(),
2642         "to_file_path() failed to produce an absolute Path"
2643     );
2644     Ok(path)
2645 }
2646 
2647 #[cfg(windows)]
file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<char>, ) -> Result<PathBuf, ()>2648 fn file_url_segments_to_pathbuf(
2649     host: Option<&str>,
2650     segments: str::Split<char>,
2651 ) -> Result<PathBuf, ()> {
2652     file_url_segments_to_pathbuf_windows(host, segments)
2653 }
2654 
2655 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2656 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows( host: Option<&str>, mut segments: str::Split<'_, char>, ) -> Result<PathBuf, ()>2657 fn file_url_segments_to_pathbuf_windows(
2658     host: Option<&str>,
2659     mut segments: str::Split<'_, char>,
2660 ) -> Result<PathBuf, ()> {
2661     let mut string = if let Some(host) = host {
2662         r"\\".to_owned() + host
2663     } else {
2664         let first = segments.next().ok_or(())?;
2665 
2666         match first.len() {
2667             2 => {
2668                 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2669                     return Err(());
2670                 }
2671 
2672                 first.to_owned()
2673             }
2674 
2675             4 => {
2676                 if !first.starts_with(parser::ascii_alpha) {
2677                     return Err(());
2678                 }
2679                 let bytes = first.as_bytes();
2680                 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2681                     return Err(());
2682                 }
2683 
2684                 first[0..1].to_owned() + ":"
2685             }
2686 
2687             _ => return Err(()),
2688         }
2689     };
2690 
2691     for segment in segments {
2692         string.push('\\');
2693 
2694         // Currently non-unicode windows paths cannot be represented
2695         match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2696             Ok(s) => string.push_str(&s),
2697             Err(..) => return Err(()),
2698         }
2699     }
2700     let path = PathBuf::from(string);
2701     debug_assert!(
2702         path.is_absolute(),
2703         "to_file_path() failed to produce an absolute Path"
2704     );
2705     Ok(path)
2706 }
2707 
2708 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2709 #[derive(Debug)]
2710 pub struct UrlQuery<'a> {
2711     url: Option<&'a mut Url>,
2712     fragment: Option<String>,
2713 }
2714 
2715 // `as_mut_string` string here exposes the internal serialization of an `Url`,
2716 // which should not be exposed to users.
2717 // We achieve that by not giving users direct access to `UrlQuery`:
2718 // * Its fields are private
2719 //   (and so can not be constructed with struct literal syntax outside of this crate),
2720 // * It has no constructor
2721 // * It is only visible (on the type level) to users in the return type of
2722 //   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
2723 // * `Serializer` keeps its target in a private field
2724 // * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
2725 impl<'a> form_urlencoded::Target for UrlQuery<'a> {
as_mut_string(&mut self) -> &mut String2726     fn as_mut_string(&mut self) -> &mut String {
2727         &mut self.url.as_mut().unwrap().serialization
2728     }
2729 
finish(mut self) -> &'a mut Url2730     fn finish(mut self) -> &'a mut Url {
2731         let url = self.url.take().unwrap();
2732         url.restore_already_parsed_fragment(self.fragment.take());
2733         url
2734     }
2735 
2736     type Finished = &'a mut Url;
2737 }
2738 
2739 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2740     fn drop(&mut self) {
2741         if let Some(url) = self.url.take() {
2742             url.restore_already_parsed_fragment(self.fragment.take())
2743         }
2744     }
2745 }
2746