1 // Copyright 2013-2015 The rust-url developers.
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 /*!
10 
11 rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12 for the [Rust](http://rust-lang.org/) programming language.
13 
14 
15 # URL parsing and data structures
16 
17 First, URL parsing may fail for various reasons and therefore returns a `Result`.
18 
19 ```
20 use url::{Url, ParseError};
21 
22 assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23 ```
24 
25 Let’s parse a valid URL and look at its components.
26 
27 ```
28 use url::{Url, Host};
29 # use url::ParseError;
30 # fn run() -> Result<(), ParseError> {
31 let issue_list_url = Url::parse(
32     "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33 )?;
34 
35 
36 assert!(issue_list_url.scheme() == "https");
37 assert!(issue_list_url.username() == "");
38 assert!(issue_list_url.password() == None);
39 assert!(issue_list_url.host_str() == Some("github.com"));
40 assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41 assert!(issue_list_url.port() == None);
42 assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43 assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44         Some(vec!["rust-lang", "rust", "issues"]));
45 assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46 assert!(issue_list_url.fragment() == None);
47 assert!(!issue_list_url.cannot_be_a_base());
48 # Ok(())
49 # }
50 # run().unwrap();
51 ```
52 
53 Some URLs are said to be *cannot-be-a-base*:
54 they don’t have a username, password, host, or port,
55 and their "path" is an arbitrary string rather than slash-separated segments:
56 
57 ```
58 use url::Url;
59 # use url::ParseError;
60 
61 # fn run() -> Result<(), ParseError> {
62 let data_url = Url::parse("data:text/plain,Hello?World#")?;
63 
64 assert!(data_url.cannot_be_a_base());
65 assert!(data_url.scheme() == "data");
66 assert!(data_url.path() == "text/plain,Hello");
67 assert!(data_url.path_segments().is_none());
68 assert!(data_url.query() == Some("World"));
69 assert!(data_url.fragment() == Some(""));
70 # Ok(())
71 # }
72 # run().unwrap();
73 ```
74 
75 
76 # Base URL
77 
78 Many contexts allow URL *references* that can be relative to a *base URL*:
79 
80 ```html
81 <link rel="stylesheet" href="../main.css">
82 ```
83 
84 Since parsed URL are absolute, giving a base is required for parsing relative URLs:
85 
86 ```
87 use url::{Url, ParseError};
88 
89 assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
90 ```
91 
92 Use the `join` method on an `Url` to use it as a base URL:
93 
94 ```
95 use url::Url;
96 # use url::ParseError;
97 
98 # fn run() -> Result<(), ParseError> {
99 let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
100 let css_url = this_document.join("../main.css")?;
101 assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
102 # Ok(())
103 # }
104 # run().unwrap();
105 */
106 
107 #![doc(html_root_url = "https://docs.rs/url/1.7.0")]
108 
109 #[cfg(feature="rustc-serialize")] extern crate rustc_serialize;
110 #[macro_use] extern crate matches;
111 #[cfg(feature="serde")] extern crate serde;
112 #[cfg(feature="heapsize")] #[macro_use] extern crate heapsize;
113 
114 pub extern crate idna;
115 #[macro_use]
116 pub extern crate percent_encoding;
117 
118 use encoding::EncodingOverride;
119 #[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
120 use host::HostInternal;
121 use parser::{Parser, Context, SchemeType, to_u32, ViolationFn};
122 use percent_encoding::{PATH_SEGMENT_ENCODE_SET, USERINFO_ENCODE_SET,
123                        percent_encode, percent_decode, utf8_percent_encode};
124 use std::borrow::Borrow;
125 use std::cmp;
126 #[cfg(feature = "serde")] use std::error::Error;
127 use std::fmt::{self, Write, Debug, Formatter};
128 use std::hash;
129 use std::io;
130 use std::mem;
131 use std::net::{ToSocketAddrs, IpAddr};
132 use std::ops::{Range, RangeFrom, RangeTo};
133 use std::path::{Path, PathBuf};
134 use std::str;
135 
136 pub use origin::{Origin, OpaqueOrigin};
137 pub use host::{Host, HostAndPort, SocketAddrs};
138 pub use path_segments::PathSegmentsMut;
139 pub use parser::{ParseError, SyntaxViolation};
140 pub use slicing::Position;
141 
142 mod encoding;
143 mod host;
144 mod origin;
145 mod path_segments;
146 mod parser;
147 mod slicing;
148 
149 pub mod form_urlencoded;
150 #[doc(hidden)] pub mod quirks;
151 
152 /// A parsed URL record.
153 #[derive(Clone)]
154 pub struct Url {
155     /// Syntax in pseudo-BNF:
156     ///
157     ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
158     ///   non-hierarchical = non-hierarchical-path
159     ///   non-hierarchical-path = /* Does not start with "/" */
160     ///   hierarchical = authority? hierarchical-path
161     ///   authority = "//" userinfo? host [ ":" port ]?
162     ///   userinfo = username [ ":" password ]? "@"
163     ///   hierarchical-path = [ "/" path-segment ]+
164     serialization: String,
165 
166     // Components
167     scheme_end: u32,  // Before ':'
168     username_end: u32,  // Before ':' (if a password is given) or '@' (if not)
169     host_start: u32,
170     host_end: u32,
171     host: HostInternal,
172     port: Option<u16>,
173     path_start: u32,  // Before initial '/', if any
174     query_start: Option<u32>,  // Before '?', unlike Position::QueryStart
175     fragment_start: Option<u32>,  // Before '#', unlike Position::FragmentStart
176 }
177 
178 #[cfg(feature = "heapsize")]
179 impl HeapSizeOf for Url {
heap_size_of_children(&self) -> usize180     fn heap_size_of_children(&self) -> usize {
181         self.serialization.heap_size_of_children()
182     }
183 }
184 
185 /// Full configuration for the URL parser.
186 #[derive(Copy, Clone)]
187 pub struct ParseOptions<'a> {
188     base_url: Option<&'a Url>,
189     encoding_override: encoding::EncodingOverride,
190     violation_fn: ViolationFn<'a>,
191 }
192 
193 impl<'a> ParseOptions<'a> {
194     /// Change the base URL
base_url(mut self, new: Option<&'a Url>) -> Self195     pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
196         self.base_url = new;
197         self
198     }
199 
200     /// Override the character encoding of query strings.
201     /// This is a legacy concept only relevant for HTML.
202     ///
203     /// `EncodingRef` is defined in [rust-encoding](https://github.com/lifthrasiir/rust-encoding).
204     ///
205     /// This method is only available if the `query_encoding`
206     /// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
207     #[cfg(feature = "query_encoding")]
encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self208     pub fn encoding_override(mut self, new: Option<encoding::EncodingRef>) -> Self {
209         self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding();
210         self
211     }
212 
213     /// Call the provided function or closure on non-fatal parse errors, passing
214     /// a static string description.  This method is deprecated in favor of
215     /// `syntax_violation_callback` and is implemented as an adaptor for the
216     /// latter, passing the `SyntaxViolation` description. Only the last value
217     /// passed to either method will be used by a parser.
218     #[deprecated]
log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self219     pub fn log_syntax_violation(mut self, new: Option<&'a Fn(&'static str)>) -> Self {
220         self.violation_fn = match new {
221             Some(f) => ViolationFn::OldFn(f),
222             None => ViolationFn::NoOp
223         };
224         self
225     }
226 
227     /// Call the provided function or closure for a non-fatal `SyntaxViolation`
228     /// when it occurs during parsing. Note that since the provided function is
229     /// `Fn`, the caller might need to utilize _interior mutability_, such as with
230     /// a `RefCell`, to collect the violations.
231     ///
232     /// ## Example
233     /// ```
234     /// use std::cell::RefCell;
235     /// use url::{Url, SyntaxViolation};
236     /// # use url::ParseError;
237     /// # fn run() -> Result<(), url::ParseError> {
238     /// let violations = RefCell::new(Vec::new());
239     /// let url = Url::options()
240     ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
241     ///     .parse("https:////example.com")?;
242     /// assert_eq!(url.as_str(), "https://example.com/");
243     /// assert_eq!(violations.into_inner(),
244     ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
245     /// # Ok(())
246     /// # }
247     /// # run().unwrap();
248     /// ```
syntax_violation_callback(mut self, new: Option<&'a Fn(SyntaxViolation)>) -> Self249     pub fn syntax_violation_callback(mut self, new: Option<&'a Fn(SyntaxViolation)>) -> Self {
250         self.violation_fn = match new {
251             Some(f) => ViolationFn::NewFn(f),
252             None => ViolationFn::NoOp
253         };
254         self
255     }
256 
257     /// Parse an URL string with the configuration so far.
parse(self, input: &str) -> Result<Url, ::ParseError>258     pub fn parse(self, input: &str) -> Result<Url, ::ParseError> {
259         Parser {
260             serialization: String::with_capacity(input.len()),
261             base_url: self.base_url,
262             query_encoding_override: self.encoding_override,
263             violation_fn: self.violation_fn,
264             context: Context::UrlParser,
265         }.parse_url(input)
266     }
267 }
268 
269 impl<'a> Debug for ParseOptions<'a> {
fmt(&self, f: &mut Formatter) -> fmt::Result270     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
271         write!(f,
272                "ParseOptions {{ base_url: {:?}, encoding_override: {:?}, \
273                 violation_fn: {:?} }}",
274                self.base_url,
275                self.encoding_override,
276                self.violation_fn)
277     }
278 }
279 
280 impl Url {
281     /// Parse an absolute URL from a string.
282     ///
283     /// # Examples
284     ///
285     /// ```rust
286     /// use url::Url;
287     /// # use url::ParseError;
288     ///
289     /// # fn run() -> Result<(), ParseError> {
290     /// let url = Url::parse("https://example.net")?;
291     /// # Ok(())
292     /// # }
293     /// # run().unwrap();
294     /// ```
295     ///
296     /// # Errors
297     ///
298     /// If the function can not parse an absolute URL from the given string,
299     /// a [`ParseError`] variant will be returned.
300     ///
301     /// [`ParseError`]: enum.ParseError.html
302     #[inline]
parse(input: &str) -> Result<Url, ::ParseError>303     pub fn parse(input: &str) -> Result<Url, ::ParseError> {
304         Url::options().parse(input)
305     }
306 
307     /// Parse an absolute URL from a string and add params to its query string.
308     ///
309     /// Existing params are not removed.
310     ///
311     /// # Examples
312     ///
313     /// ```rust
314     /// use url::Url;
315     /// # use url::ParseError;
316     ///
317     /// # fn run() -> Result<(), ParseError> {
318     /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
319     ///                                  &[("lang", "rust"), ("browser", "servo")])?;
320     /// # Ok(())
321     /// # }
322     /// # run().unwrap();
323     /// ```
324     ///
325     /// # Errors
326     ///
327     /// If the function can not parse an absolute URL from the given string,
328     /// a [`ParseError`] variant will be returned.
329     ///
330     /// [`ParseError`]: enum.ParseError.html
331     #[inline]
parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, ::ParseError> where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str>332     pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, ::ParseError>
333         where I: IntoIterator,
334               I::Item: Borrow<(K, V)>,
335               K: AsRef<str>,
336               V: AsRef<str>
337     {
338         let mut url = Url::options().parse(input);
339 
340         if let Ok(ref mut url) = url {
341             url.query_pairs_mut().extend_pairs(iter);
342         }
343 
344         url
345     }
346 
347     /// Parse a string as an URL, with this URL as the base URL.
348     ///
349     /// Note: a trailing slash is significant.
350     /// Without it, the last path component is considered to be a “file” name
351     /// to be removed to get at the “directory” that is used as the base:
352     ///
353     /// # Examples
354     ///
355     /// ```rust
356     /// use url::Url;
357     /// # use url::ParseError;
358     ///
359     /// # fn run() -> Result<(), ParseError> {
360     /// let base = Url::parse("https://example.net/a/b.html")?;
361     /// let url = base.join("c.png")?;
362     /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
363     ///
364     /// let base = Url::parse("https://example.net/a/b/")?;
365     /// let url = base.join("c.png")?;
366     /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
367     /// # Ok(())
368     /// # }
369     /// # run().unwrap();
370     /// ```
371     ///
372     /// # Errors
373     ///
374     /// If the function can not parse an URL from the given string
375     /// with this URL as the base URL, a [`ParseError`] variant will be returned.
376     ///
377     /// [`ParseError`]: enum.ParseError.html
378     #[inline]
join(&self, input: &str) -> Result<Url, ::ParseError>379     pub fn join(&self, input: &str) -> Result<Url, ::ParseError> {
380         Url::options().base_url(Some(self)).parse(input)
381     }
382 
383     /// Return a default `ParseOptions` that can fully configure the URL parser.
384     ///
385     /// # Examples
386     ///
387     /// Get default `ParseOptions`, then change base url
388     ///
389     /// ```rust
390     /// use url::Url;
391     /// # use url::ParseError;
392     /// # fn run() -> Result<(), ParseError> {
393     /// let options = Url::options();
394     /// let api = Url::parse("https://api.example.com")?;
395     /// let base_url = options.base_url(Some(&api));
396     /// let version_url = base_url.parse("version.json")?;
397     /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
398     /// # Ok(())
399     /// # }
400     /// # run().unwrap();
401     /// ```
options<'a>() -> ParseOptions<'a>402     pub fn options<'a>() -> ParseOptions<'a> {
403         ParseOptions {
404             base_url: None,
405             encoding_override: EncodingOverride::utf8(),
406             violation_fn: ViolationFn::NoOp,
407         }
408     }
409 
410     /// Return the serialization of this URL.
411     ///
412     /// This is fast since that serialization is already stored in the `Url` struct.
413     ///
414     /// # Examples
415     ///
416     /// ```rust
417     /// use url::Url;
418     /// # use url::ParseError;
419     ///
420     /// # fn run() -> Result<(), ParseError> {
421     /// let url_str = "https://example.net/";
422     /// let url = Url::parse(url_str)?;
423     /// assert_eq!(url.as_str(), url_str);
424     /// # Ok(())
425     /// # }
426     /// # run().unwrap();
427     /// ```
428     #[inline]
as_str(&self) -> &str429     pub fn as_str(&self) -> &str {
430         &self.serialization
431     }
432 
433     /// Return the serialization of this URL.
434     ///
435     /// This consumes the `Url` and takes ownership of the `String` stored in it.
436     ///
437     /// # Examples
438     ///
439     /// ```rust
440     /// use url::Url;
441     /// # use url::ParseError;
442     ///
443     /// # fn run() -> Result<(), ParseError> {
444     /// let url_str = "https://example.net/";
445     /// let url = Url::parse(url_str)?;
446     /// assert_eq!(url.into_string(), url_str);
447     /// # Ok(())
448     /// # }
449     /// # run().unwrap();
450     /// ```
451     #[inline]
into_string(self) -> String452     pub fn into_string(self) -> String {
453         self.serialization
454     }
455 
456     /// For internal testing, not part of the public API.
457     ///
458     /// Methods of the `Url` struct assume a number of invariants.
459     /// This checks each of these invariants and panic if one is not met.
460     /// This is for testing rust-url itself.
461     #[doc(hidden)]
check_invariants(&self) -> Result<(), String>462     pub fn check_invariants(&self) -> Result<(), String> {
463         macro_rules! assert {
464             ($x: expr) => {
465                 if !$x {
466                     return Err(format!("!( {} ) for URL {:?}",
467                                        stringify!($x), self.serialization))
468                 }
469             }
470         }
471 
472         macro_rules! assert_eq {
473             ($a: expr, $b: expr) => {
474                 {
475                     let a = $a;
476                     let b = $b;
477                     if a != b {
478                         return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
479                                            a, b, stringify!($a), stringify!($b),
480                                            self.serialization))
481                     }
482                 }
483             }
484         }
485 
486         assert!(self.scheme_end >= 1);
487         assert!(matches!(self.byte_at(0), b'a'...b'z' | b'A'...b'Z'));
488         assert!(self.slice(1..self.scheme_end).chars()
489                 .all(|c| matches!(c, 'a'...'z' | 'A'...'Z' | '0'...'9' | '+' | '-' | '.')));
490         assert_eq!(self.byte_at(self.scheme_end), b':');
491 
492         if self.slice(self.scheme_end + 1 ..).starts_with("//") {
493             // URL with authority
494             match self.byte_at(self.username_end) {
495                 b':' => {
496                     assert!(self.host_start >= self.username_end + 2);
497                     assert_eq!(self.byte_at(self.host_start - 1), b'@');
498                 }
499                 b'@' => assert!(self.host_start == self.username_end + 1),
500                 _ => assert_eq!(self.username_end, self.scheme_end + 3),
501             }
502             assert!(self.host_start >= self.username_end);
503             assert!(self.host_end >= self.host_start);
504             let host_str = self.slice(self.host_start..self.host_end);
505             match self.host {
506                 HostInternal::None => assert_eq!(host_str, ""),
507                 HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
508                 HostInternal::Ipv6(address) => {
509                     let h: Host<String> = Host::Ipv6(address);
510                     assert_eq!(host_str, h.to_string())
511                 }
512                 HostInternal::Domain => {
513                     if SchemeType::from(self.scheme()).is_special() {
514                         assert!(!host_str.is_empty())
515                     }
516                 }
517             }
518             if self.path_start == self.host_end {
519                 assert_eq!(self.port, None);
520             } else {
521                 assert_eq!(self.byte_at(self.host_end), b':');
522                 let port_str = self.slice(self.host_end + 1..self.path_start);
523                 assert_eq!(self.port, Some(port_str.parse::<u16>().expect("Couldn't parse port?")));
524             }
525             assert_eq!(self.byte_at(self.path_start), b'/');
526         } else {
527             // Anarchist URL (no authority)
528             assert_eq!(self.username_end, self.scheme_end + 1);
529             assert_eq!(self.host_start, self.scheme_end + 1);
530             assert_eq!(self.host_end, self.scheme_end + 1);
531             assert_eq!(self.host, HostInternal::None);
532             assert_eq!(self.port, None);
533             assert_eq!(self.path_start, self.scheme_end + 1);
534         }
535         if let Some(start) = self.query_start {
536             assert!(start > self.path_start);
537             assert_eq!(self.byte_at(start), b'?');
538         }
539         if let Some(start) = self.fragment_start {
540             assert!(start > self.path_start);
541             assert_eq!(self.byte_at(start), b'#');
542         }
543         if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
544             assert!(fragment_start > query_start);
545         }
546 
547         let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
548         assert_eq!(&self.serialization, &other.serialization);
549         assert_eq!(self.scheme_end, other.scheme_end);
550         assert_eq!(self.username_end, other.username_end);
551         assert_eq!(self.host_start, other.host_start);
552         assert_eq!(self.host_end, other.host_end);
553         assert!(self.host == other.host ||
554                 // XXX No host round-trips to empty host.
555                 // See https://github.com/whatwg/url/issues/79
556                 (self.host_str(), other.host_str()) == (None, Some("")));
557         assert_eq!(self.port, other.port);
558         assert_eq!(self.path_start, other.path_start);
559         assert_eq!(self.query_start, other.query_start);
560         assert_eq!(self.fragment_start, other.fragment_start);
561         Ok(())
562     }
563 
564     /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
565     ///
566     /// Note: this returns an opaque origin for `file:` URLs, which causes
567     /// `url.origin() != url.origin()`.
568     ///
569     /// # Examples
570     ///
571     /// URL with `ftp` scheme:
572     ///
573     /// ```rust
574     /// use url::{Host, Origin, Url};
575     /// # use url::ParseError;
576     ///
577     /// # fn run() -> Result<(), ParseError> {
578     /// let url = Url::parse("ftp://example.com/foo")?;
579     /// assert_eq!(url.origin(),
580     ///            Origin::Tuple("ftp".into(),
581     ///                          Host::Domain("example.com".into()),
582     ///                          21));
583     /// # Ok(())
584     /// # }
585     /// # run().unwrap();
586     /// ```
587     ///
588     /// URL with `blob` scheme:
589     ///
590     /// ```rust
591     /// use url::{Host, Origin, Url};
592     /// # use url::ParseError;
593     ///
594     /// # fn run() -> Result<(), ParseError> {
595     /// let url = Url::parse("blob:https://example.com/foo")?;
596     /// assert_eq!(url.origin(),
597     ///            Origin::Tuple("https".into(),
598     ///                          Host::Domain("example.com".into()),
599     ///                          443));
600     /// # Ok(())
601     /// # }
602     /// # run().unwrap();
603     /// ```
604     ///
605     /// URL with `file` scheme:
606     ///
607     /// ```rust
608     /// use url::{Host, Origin, Url};
609     /// # use url::ParseError;
610     ///
611     /// # fn run() -> Result<(), ParseError> {
612     /// let url = Url::parse("file:///tmp/foo")?;
613     /// assert!(!url.origin().is_tuple());
614     ///
615     /// let other_url = Url::parse("file:///tmp/foo")?;
616     /// assert!(url.origin() != other_url.origin());
617     /// # Ok(())
618     /// # }
619     /// # run().unwrap();
620     /// ```
621     ///
622     /// URL with other scheme:
623     ///
624     /// ```rust
625     /// use url::{Host, Origin, Url};
626     /// # use url::ParseError;
627     ///
628     /// # fn run() -> Result<(), ParseError> {
629     /// let url = Url::parse("foo:bar")?;
630     /// assert!(!url.origin().is_tuple());
631     /// # Ok(())
632     /// # }
633     /// # run().unwrap();
634     /// ```
635     #[inline]
origin(&self) -> Origin636     pub fn origin(&self) -> Origin {
637         origin::url_origin(self)
638     }
639 
640     /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
641     ///
642     /// # Examples
643     ///
644     /// ```
645     /// use url::Url;
646     /// # use url::ParseError;
647     ///
648     /// # fn run() -> Result<(), ParseError> {
649     /// let url = Url::parse("file:///tmp/foo")?;
650     /// assert_eq!(url.scheme(), "file");
651     /// # Ok(())
652     /// # }
653     /// # run().unwrap();
654     /// ```
655     #[inline]
scheme(&self) -> &str656     pub fn scheme(&self) -> &str {
657         self.slice(..self.scheme_end)
658     }
659 
660     /// Return whether the URL has an 'authority',
661     /// which can contain a username, password, host, and port number.
662     ///
663     /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
664     /// or cannot-be-a-base like `data:text/plain,Stuff`.
665     ///
666     /// # Examples
667     ///
668     /// ```
669     /// use url::Url;
670     /// # use url::ParseError;
671     ///
672     /// # fn run() -> Result<(), ParseError> {
673     /// let url = Url::parse("ftp://rms@example.com")?;
674     /// assert!(url.has_authority());
675     ///
676     /// let url = Url::parse("unix:/run/foo.socket")?;
677     /// assert!(!url.has_authority());
678     ///
679     /// let url = Url::parse("data:text/plain,Stuff")?;
680     /// assert!(!url.has_authority());
681     /// # Ok(())
682     /// # }
683     /// # run().unwrap();
684     /// ```
685     #[inline]
has_authority(&self) -> bool686     pub fn has_authority(&self) -> bool {
687         debug_assert!(self.byte_at(self.scheme_end) == b':');
688         self.slice(self.scheme_end..).starts_with("://")
689     }
690 
691     /// Return whether this URL is a cannot-be-a-base URL,
692     /// meaning that parsing a relative URL string with this URL as the base will return an error.
693     ///
694     /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
695     /// as is typically the case of `data:` and `mailto:` URLs.
696     ///
697     /// # Examples
698     ///
699     /// ```
700     /// use url::Url;
701     /// # use url::ParseError;
702     ///
703     /// # fn run() -> Result<(), ParseError> {
704     /// let url = Url::parse("ftp://rms@example.com")?;
705     /// assert!(!url.cannot_be_a_base());
706     ///
707     /// let url = Url::parse("unix:/run/foo.socket")?;
708     /// assert!(!url.cannot_be_a_base());
709     ///
710     /// let url = Url::parse("data:text/plain,Stuff")?;
711     /// assert!(url.cannot_be_a_base());
712     /// # Ok(())
713     /// # }
714     /// # run().unwrap();
715     /// ```
716     #[inline]
cannot_be_a_base(&self) -> bool717     pub fn cannot_be_a_base(&self) -> bool {
718         !self.slice(self.path_start..).starts_with('/')
719     }
720 
721     /// Return the username for this URL (typically the empty string)
722     /// as a percent-encoded ASCII string.
723     ///
724     /// # Examples
725     ///
726     /// ```
727     /// use url::Url;
728     /// # use url::ParseError;
729     ///
730     /// # fn run() -> Result<(), ParseError> {
731     /// let url = Url::parse("ftp://rms@example.com")?;
732     /// assert_eq!(url.username(), "rms");
733     ///
734     /// let url = Url::parse("ftp://:secret123@example.com")?;
735     /// assert_eq!(url.username(), "");
736     ///
737     /// let url = Url::parse("https://example.com")?;
738     /// assert_eq!(url.username(), "");
739     /// # Ok(())
740     /// # }
741     /// # run().unwrap();
742     /// ```
username(&self) -> &str743     pub fn username(&self) -> &str {
744         if self.has_authority() {
745             self.slice(self.scheme_end + ("://".len() as u32)..self.username_end)
746         } else {
747             ""
748         }
749     }
750 
751     /// Return the password for this URL, if any, as a percent-encoded ASCII string.
752     ///
753     /// # Examples
754     ///
755     /// ```
756     /// use url::Url;
757     /// # use url::ParseError;
758     ///
759     /// # fn run() -> Result<(), ParseError> {
760     /// let url = Url::parse("ftp://rms:secret123@example.com")?;
761     /// assert_eq!(url.password(), Some("secret123"));
762     ///
763     /// let url = Url::parse("ftp://:secret123@example.com")?;
764     /// assert_eq!(url.password(), Some("secret123"));
765     ///
766     /// let url = Url::parse("ftp://rms@example.com")?;
767     /// assert_eq!(url.password(), None);
768     ///
769     /// let url = Url::parse("https://example.com")?;
770     /// assert_eq!(url.password(), None);
771     /// # Ok(())
772     /// # }
773     /// # run().unwrap();
774     /// ```
password(&self) -> Option<&str>775     pub fn password(&self) -> Option<&str> {
776         // This ':' is not the one marking a port number since a host can not be empty.
777         // (Except for file: URLs, which do not have port numbers.)
778         if self.has_authority() && self.byte_at(self.username_end) == b':' {
779             debug_assert!(self.byte_at(self.host_start - 1) == b'@');
780             Some(self.slice(self.username_end + 1..self.host_start - 1))
781         } else {
782             None
783         }
784     }
785 
786     /// Equivalent to `url.host().is_some()`.
787     ///
788     /// # Examples
789     ///
790     /// ```
791     /// use url::Url;
792     /// # use url::ParseError;
793     ///
794     /// # fn run() -> Result<(), ParseError> {
795     /// let url = Url::parse("ftp://rms@example.com")?;
796     /// assert!(url.has_host());
797     ///
798     /// let url = Url::parse("unix:/run/foo.socket")?;
799     /// assert!(!url.has_host());
800     ///
801     /// let url = Url::parse("data:text/plain,Stuff")?;
802     /// assert!(!url.has_host());
803     /// # Ok(())
804     /// # }
805     /// # run().unwrap();
806     /// ```
has_host(&self) -> bool807     pub fn has_host(&self) -> bool {
808         !matches!(self.host, HostInternal::None)
809     }
810 
811     /// Return the string representation of the host (domain or IP address) for this URL, if any.
812     ///
813     /// Non-ASCII domains are punycode-encoded per IDNA.
814     /// IPv6 addresses are given between `[` and `]` brackets.
815     ///
816     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
817     /// don’t have a host.
818     ///
819     /// See also the `host` method.
820     ///
821     /// # Examples
822     ///
823     /// ```
824     /// use url::Url;
825     /// # use url::ParseError;
826     ///
827     /// # fn run() -> Result<(), ParseError> {
828     /// let url = Url::parse("https://127.0.0.1/index.html")?;
829     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
830     ///
831     /// let url = Url::parse("ftp://rms@example.com")?;
832     /// assert_eq!(url.host_str(), Some("example.com"));
833     ///
834     /// let url = Url::parse("unix:/run/foo.socket")?;
835     /// assert_eq!(url.host_str(), None);
836     ///
837     /// let url = Url::parse("data:text/plain,Stuff")?;
838     /// assert_eq!(url.host_str(), None);
839     /// # Ok(())
840     /// # }
841     /// # run().unwrap();
842     /// ```
host_str(&self) -> Option<&str>843     pub fn host_str(&self) -> Option<&str> {
844         if self.has_host() {
845             Some(self.slice(self.host_start..self.host_end))
846         } else {
847             None
848         }
849     }
850 
851     /// Return the parsed representation of the host for this URL.
852     /// Non-ASCII domain labels are punycode-encoded per IDNA.
853     ///
854     /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
855     /// don’t have a host.
856     ///
857     /// See also the `host_str` method.
858     ///
859     /// # Examples
860     ///
861     /// ```
862     /// use url::Url;
863     /// # use url::ParseError;
864     ///
865     /// # fn run() -> Result<(), ParseError> {
866     /// let url = Url::parse("https://127.0.0.1/index.html")?;
867     /// assert!(url.host().is_some());
868     ///
869     /// let url = Url::parse("ftp://rms@example.com")?;
870     /// assert!(url.host().is_some());
871     ///
872     /// let url = Url::parse("unix:/run/foo.socket")?;
873     /// assert!(url.host().is_none());
874     ///
875     /// let url = Url::parse("data:text/plain,Stuff")?;
876     /// assert!(url.host().is_none());
877     /// # Ok(())
878     /// # }
879     /// # run().unwrap();
880     /// ```
host(&self) -> Option<Host<&str>>881     pub fn host(&self) -> Option<Host<&str>> {
882         match self.host {
883             HostInternal::None => None,
884             HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
885             HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
886             HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
887         }
888     }
889 
890     /// If this URL has a host and it is a domain name (not an IP address), return it.
891     ///
892     /// # Examples
893     ///
894     /// ```
895     /// use url::Url;
896     /// # use url::ParseError;
897     ///
898     /// # fn run() -> Result<(), ParseError> {
899     /// let url = Url::parse("https://127.0.0.1/")?;
900     /// assert_eq!(url.domain(), None);
901     ///
902     /// let url = Url::parse("mailto:rms@example.net")?;
903     /// assert_eq!(url.domain(), None);
904     ///
905     /// let url = Url::parse("https://example.com/")?;
906     /// assert_eq!(url.domain(), Some("example.com"));
907     /// # Ok(())
908     /// # }
909     /// # run().unwrap();
910     /// ```
domain(&self) -> Option<&str>911     pub fn domain(&self) -> Option<&str> {
912         match self.host {
913             HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
914             _ => None,
915         }
916     }
917 
918     /// Return the port number for this URL, if any.
919     ///
920     /// # Examples
921     ///
922     /// ```
923     /// use url::Url;
924     /// # use url::ParseError;
925     ///
926     /// # fn run() -> Result<(), ParseError> {
927     /// let url = Url::parse("https://example.com")?;
928     /// assert_eq!(url.port(), None);
929     ///
930     /// let url = Url::parse("ssh://example.com:22")?;
931     /// assert_eq!(url.port(), Some(22));
932     /// # Ok(())
933     /// # }
934     /// # run().unwrap();
935     /// ```
936     #[inline]
port(&self) -> Option<u16>937     pub fn port(&self) -> Option<u16> {
938         self.port
939     }
940 
941     /// Return the port number for this URL, or the default port number if it is known.
942     ///
943     /// This method only knows the default port number
944     /// of the `http`, `https`, `ws`, `wss`, `ftp`, and `gopher` schemes.
945     ///
946     /// For URLs in these schemes, this method always returns `Some(_)`.
947     /// For other schemes, it is the same as `Url::port()`.
948     ///
949     /// # Examples
950     ///
951     /// ```
952     /// use url::Url;
953     /// # use url::ParseError;
954     ///
955     /// # fn run() -> Result<(), ParseError> {
956     /// let url = Url::parse("foo://example.com")?;
957     /// assert_eq!(url.port_or_known_default(), None);
958     ///
959     /// let url = Url::parse("foo://example.com:1456")?;
960     /// assert_eq!(url.port_or_known_default(), Some(1456));
961     ///
962     /// let url = Url::parse("https://example.com")?;
963     /// assert_eq!(url.port_or_known_default(), Some(443));
964     /// # Ok(())
965     /// # }
966     /// # run().unwrap();
967     /// ```
968     #[inline]
port_or_known_default(&self) -> Option<u16>969     pub fn port_or_known_default(&self) -> Option<u16> {
970         self.port.or_else(|| parser::default_port(self.scheme()))
971     }
972 
973     /// If the URL has a host, return something that implements `ToSocketAddrs`.
974     ///
975     /// If the URL has no port number and the scheme’s default port number is not known
976     /// (see `Url::port_or_known_default`),
977     /// the closure is called to obtain a port number.
978     /// Typically, this closure can match on the result `Url::scheme`
979     /// to have per-scheme default port numbers,
980     /// and panic for schemes it’s not prepared to handle.
981     /// For example:
982     ///
983     /// ```rust
984     /// # use url::Url;
985     /// # use std::net::TcpStream;
986     /// # use std::io;
987     /// fn connect(url: &Url) -> io::Result<TcpStream> {
988     ///     TcpStream::connect(url.with_default_port(default_port)?)
989     /// }
990     ///
991     /// fn default_port(url: &Url) -> Result<u16, ()> {
992     ///     match url.scheme() {
993     ///         "git" => Ok(9418),
994     ///         "git+ssh" => Ok(22),
995     ///         "git+https" => Ok(443),
996     ///         "git+http" => Ok(80),
997     ///         _ => Err(()),
998     ///     }
999     /// }
1000     /// ```
with_default_port<F>(&self, f: F) -> io::Result<HostAndPort<&str>> where F: FnOnce(&Url) -> Result<u16, ()>1001     pub fn with_default_port<F>(&self, f: F) -> io::Result<HostAndPort<&str>>
1002     where F: FnOnce(&Url) -> Result<u16, ()> {
1003         Ok(HostAndPort {
1004             host: self.host()
1005                       .ok_or(())
1006                       .or_else(|()| io_error("URL has no host"))?,
1007             port: self.port_or_known_default()
1008                       .ok_or(())
1009                       .or_else(|()| f(self))
1010                       .or_else(|()| io_error("URL has no port number"))?
1011         })
1012     }
1013 
1014     /// Return the path for this URL, as a percent-encoded ASCII string.
1015     /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1016     /// For other URLs, this starts with a '/' slash
1017     /// and continues with slash-separated path segments.
1018     ///
1019     /// # Examples
1020     ///
1021     /// ```rust
1022     /// use url::{Url, ParseError};
1023     ///
1024     /// # fn run() -> Result<(), ParseError> {
1025     /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1026     /// assert_eq!(url.path(), "/api/versions");
1027     ///
1028     /// let url = Url::parse("https://example.com")?;
1029     /// assert_eq!(url.path(), "/");
1030     ///
1031     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1032     /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1033     /// # Ok(())
1034     /// # }
1035     /// # run().unwrap();
1036     /// ```
path(&self) -> &str1037     pub fn path(&self) -> &str {
1038         match (self.query_start, self.fragment_start) {
1039             (None, None) => self.slice(self.path_start..),
1040             (Some(next_component_start), _) |
1041             (None, Some(next_component_start)) => {
1042                 self.slice(self.path_start..next_component_start)
1043             }
1044         }
1045     }
1046 
1047     /// Unless this URL is cannot-be-a-base,
1048     /// return an iterator of '/' slash-separated path segments,
1049     /// each as a percent-encoded ASCII string.
1050     ///
1051     /// Return `None` for cannot-be-a-base URLs.
1052     ///
1053     /// When `Some` is returned, the iterator always contains at least one string
1054     /// (which may be empty).
1055     ///
1056     /// # Examples
1057     ///
1058     /// ```
1059     /// use url::Url;
1060     /// # use std::error::Error;
1061     ///
1062     /// # fn run() -> Result<(), Box<Error>> {
1063     /// let url = Url::parse("https://example.com/foo/bar")?;
1064     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1065     /// assert_eq!(path_segments.next(), Some("foo"));
1066     /// assert_eq!(path_segments.next(), Some("bar"));
1067     /// assert_eq!(path_segments.next(), None);
1068     ///
1069     /// let url = Url::parse("https://example.com")?;
1070     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1071     /// assert_eq!(path_segments.next(), Some(""));
1072     /// assert_eq!(path_segments.next(), None);
1073     ///
1074     /// let url = Url::parse("data:text/plain,HelloWorld")?;
1075     /// assert!(url.path_segments().is_none());
1076     ///
1077     /// let url = Url::parse("https://example.com/countries/việt nam")?;
1078     /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1079     /// assert_eq!(path_segments.next(), Some("countries"));
1080     /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1081     /// # Ok(())
1082     /// # }
1083     /// # run().unwrap();
1084     /// ```
path_segments(&self) -> Option<str::Split<char>>1085     pub fn path_segments(&self) -> Option<str::Split<char>> {
1086         let path = self.path();
1087         if path.starts_with('/') {
1088             Some(path[1..].split('/'))
1089         } else {
1090             None
1091         }
1092     }
1093 
1094     /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1095     ///
1096     /// # Examples
1097     ///
1098     /// ```rust
1099     /// use url::Url;
1100     /// # use url::ParseError;
1101     ///
1102     /// fn run() -> Result<(), ParseError> {
1103     /// let url = Url::parse("https://example.com/products?page=2")?;
1104     /// let query = url.query();
1105     /// assert_eq!(query, Some("page=2"));
1106     ///
1107     /// let url = Url::parse("https://example.com/products")?;
1108     /// let query = url.query();
1109     /// assert!(query.is_none());
1110     ///
1111     /// let url = Url::parse("https://example.com/?country=español")?;
1112     /// let query = url.query();
1113     /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1114     /// # Ok(())
1115     /// # }
1116     /// # run().unwrap();
1117     /// ```
query(&self) -> Option<&str>1118     pub fn query(&self) -> Option<&str> {
1119         match (self.query_start, self.fragment_start) {
1120             (None, _) => None,
1121             (Some(query_start), None) => {
1122                 debug_assert!(self.byte_at(query_start) == b'?');
1123                 Some(self.slice(query_start + 1..))
1124             }
1125             (Some(query_start), Some(fragment_start)) => {
1126                 debug_assert!(self.byte_at(query_start) == b'?');
1127                 Some(self.slice(query_start + 1..fragment_start))
1128             }
1129         }
1130     }
1131 
1132     /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1133     /// and return an iterator of (key, value) pairs.
1134     ///
1135     /// # Examples
1136     ///
1137     /// ```rust
1138     /// use std::borrow::Cow;
1139     ///
1140     /// use url::Url;
1141     /// # use url::ParseError;
1142     ///
1143     /// # fn run() -> Result<(), ParseError> {
1144     /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1145     /// let mut pairs = url.query_pairs();
1146     ///
1147     /// assert_eq!(pairs.count(), 2);
1148     ///
1149     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1150     /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1151     /// # Ok(())
1152     /// # }
1153     /// # run().unwrap();
1154     ///
1155 
1156     #[inline]
query_pairs(&self) -> form_urlencoded::Parse1157     pub fn query_pairs(&self) -> form_urlencoded::Parse {
1158         form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1159     }
1160 
1161     /// Return this URL’s fragment identifier, if any.
1162     ///
1163     /// A fragment is the part of the URL after the `#` symbol.
1164     /// The fragment is optional and, if present, contains a fragment identifier
1165     /// that identifies a secondary resource, such as a section heading
1166     /// of a document.
1167     ///
1168     /// In HTML, the fragment identifier is usually the id attribute of a an element
1169     /// that is scrolled to on load. Browsers typically will not send the fragment portion
1170     /// of a URL to the server.
1171     ///
1172     /// **Note:** the parser did *not* percent-encode this component,
1173     /// but the input may have been percent-encoded already.
1174     ///
1175     /// # Examples
1176     ///
1177     /// ```rust
1178     /// use url::Url;
1179     /// # use url::ParseError;
1180     ///
1181     /// # fn run() -> Result<(), ParseError> {
1182     /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1183     ///
1184     /// assert_eq!(url.fragment(), Some("row=4"));
1185     ///
1186     /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1187     ///
1188     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1189     /// # Ok(())
1190     /// # }
1191     /// # run().unwrap();
1192     /// ```
fragment(&self) -> Option<&str>1193     pub fn fragment(&self) -> Option<&str> {
1194         self.fragment_start.map(|start| {
1195             debug_assert!(self.byte_at(start) == b'#');
1196             self.slice(start + 1..)
1197         })
1198     }
1199 
mutate<F: FnOnce(&mut Parser) -> R, R>(&mut self, f: F) -> R1200     fn mutate<F: FnOnce(&mut Parser) -> R, R>(&mut self, f: F) -> R {
1201         let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
1202         let result = f(&mut parser);
1203         self.serialization = parser.serialization;
1204         result
1205     }
1206 
1207     /// Change this URL’s fragment identifier.
1208     ///
1209     /// # Examples
1210     ///
1211     /// ```rust
1212     /// use url::Url;
1213     /// # use url::ParseError;
1214     ///
1215     /// # fn run() -> Result<(), ParseError> {
1216     /// let mut url = Url::parse("https://example.com/data.csv")?;
1217     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1218 
1219     /// url.set_fragment(Some("cell=4,1-6,2"));
1220     /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1221     /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1222     ///
1223     /// url.set_fragment(None);
1224     /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1225     /// assert!(url.fragment().is_none());
1226     /// # Ok(())
1227     /// # }
1228     /// # run().unwrap();
1229     /// ```
set_fragment(&mut self, fragment: Option<&str>)1230     pub fn set_fragment(&mut self, fragment: Option<&str>) {
1231         // Remove any previous fragment
1232         if let Some(start) = self.fragment_start {
1233             debug_assert!(self.byte_at(start) == b'#');
1234             self.serialization.truncate(start as usize);
1235         }
1236         // Write the new one
1237         if let Some(input) = fragment {
1238             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1239             self.serialization.push('#');
1240             self.mutate(|parser| parser.parse_fragment(parser::Input::new(input)))
1241         } else {
1242             self.fragment_start = None
1243         }
1244     }
1245 
take_fragment(&mut self) -> Option<String>1246     fn take_fragment(&mut self) -> Option<String> {
1247         self.fragment_start.take().map(|start| {
1248             debug_assert!(self.byte_at(start) == b'#');
1249             let fragment = self.slice(start + 1..).to_owned();
1250             self.serialization.truncate(start as usize);
1251             fragment
1252         })
1253     }
1254 
restore_already_parsed_fragment(&mut self, fragment: Option<String>)1255     fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1256         if let Some(ref fragment) = fragment {
1257             assert!(self.fragment_start.is_none());
1258             self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1259             self.serialization.push('#');
1260             self.serialization.push_str(fragment);
1261         }
1262     }
1263 
1264     /// Change this URL’s query string.
1265     ///
1266     /// # Examples
1267     ///
1268     /// ```rust
1269     /// use url::Url;
1270     /// # use url::ParseError;
1271     ///
1272     /// # fn run() -> Result<(), ParseError> {
1273     /// let mut url = Url::parse("https://example.com/products")?;
1274     /// assert_eq!(url.as_str(), "https://example.com/products");
1275     ///
1276     /// url.set_query(Some("page=2"));
1277     /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1278     /// assert_eq!(url.query(), Some("page=2"));
1279     /// # Ok(())
1280     /// # }
1281     /// # run().unwrap();
1282     /// ```
set_query(&mut self, query: Option<&str>)1283     pub fn set_query(&mut self, query: Option<&str>) {
1284         let fragment = self.take_fragment();
1285 
1286         // Remove any previous query
1287         if let Some(start) = self.query_start.take() {
1288             debug_assert!(self.byte_at(start) == b'?');
1289             self.serialization.truncate(start as usize);
1290         }
1291         // Write the new query, if any
1292         if let Some(input) = query {
1293             self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1294             self.serialization.push('?');
1295             let scheme_end = self.scheme_end;
1296             self.mutate(|parser| parser.parse_query(scheme_end, parser::Input::new(input)));
1297         }
1298 
1299         self.restore_already_parsed_fragment(fragment);
1300     }
1301 
1302     /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1303     /// in `application/x-www-form-urlencoded` syntax.
1304     ///
1305     /// The return value has a method-chaining API:
1306     ///
1307     /// ```rust
1308     /// # use url::{Url, ParseError};
1309     ///
1310     /// # fn run() -> Result<(), ParseError> {
1311     /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1312     /// assert_eq!(url.query(), Some("lang=fr"));
1313     ///
1314     /// url.query_pairs_mut().append_pair("foo", "bar");
1315     /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1316     /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1317     ///
1318     /// url.query_pairs_mut()
1319     ///     .clear()
1320     ///     .append_pair("foo", "bar & baz")
1321     ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1322     /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1323     /// assert_eq!(url.as_str(),
1324     ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1325     /// # Ok(())
1326     /// # }
1327     /// # run().unwrap();
1328     /// ```
1329     ///
1330     /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1331     /// not `url.set_query(None)`.
1332     ///
1333     /// The state of `Url` is unspecified if this return value is leaked without being dropped.
query_pairs_mut(&mut self) -> form_urlencoded::Serializer<UrlQuery>1334     pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<UrlQuery> {
1335         let fragment = self.take_fragment();
1336 
1337         let query_start;
1338         if let Some(start) = self.query_start {
1339             debug_assert!(self.byte_at(start) == b'?');
1340             query_start = start as usize;
1341         } else {
1342             query_start = self.serialization.len();
1343             self.query_start = Some(to_u32(query_start).unwrap());
1344             self.serialization.push('?');
1345         }
1346 
1347         let query = UrlQuery { url: Some(self), fragment: fragment };
1348         form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1349     }
1350 
take_after_path(&mut self) -> String1351     fn take_after_path(&mut self) -> String {
1352         match (self.query_start, self.fragment_start) {
1353             (Some(i), _) | (None, Some(i)) => {
1354                 let after_path = self.slice(i..).to_owned();
1355                 self.serialization.truncate(i as usize);
1356                 after_path
1357             },
1358             (None, None) => String::new(),
1359         }
1360     }
1361 
1362     /// Change this URL’s path.
1363     ///
1364     /// # Examples
1365     ///
1366     /// ```rust
1367     /// use url::Url;
1368     /// # use url::ParseError;
1369     ///
1370     /// # fn run() -> Result<(), ParseError> {
1371     /// let mut url = Url::parse("https://example.com")?;
1372     /// url.set_path("api/comments");
1373     /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1374     /// assert_eq!(url.path(), "/api/comments");
1375     ///
1376     /// let mut url = Url::parse("https://example.com/api")?;
1377     /// url.set_path("data/report.csv");
1378     /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1379     /// assert_eq!(url.path(), "/data/report.csv");
1380     /// # Ok(())
1381     /// # }
1382     /// # run().unwrap();
1383     /// ```
set_path(&mut self, mut path: &str)1384     pub fn set_path(&mut self, mut path: &str) {
1385         let after_path = self.take_after_path();
1386         let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1387         let cannot_be_a_base = self.cannot_be_a_base();
1388         let scheme_type = SchemeType::from(self.scheme());
1389         self.serialization.truncate(self.path_start as usize);
1390         self.mutate(|parser| {
1391             if cannot_be_a_base {
1392                 if path.starts_with('/') {
1393                     parser.serialization.push_str("%2F");
1394                     path = &path[1..];
1395                 }
1396                 parser.parse_cannot_be_a_base_path(parser::Input::new(path));
1397             } else {
1398                 let mut has_host = true;  // FIXME
1399                 parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
1400             }
1401         });
1402         self.restore_after_path(old_after_path_pos, &after_path);
1403     }
1404 
1405     /// Return an object with methods to manipulate this URL’s path segments.
1406     ///
1407     /// Return `Err(())` if this URL is cannot-be-a-base.
path_segments_mut(&mut self) -> Result<PathSegmentsMut, ()>1408     pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut, ()> {
1409         if self.cannot_be_a_base() {
1410             Err(())
1411         } else {
1412             Ok(path_segments::new(self))
1413         }
1414     }
1415 
restore_after_path(&mut self, old_after_path_position: u32, after_path: &str)1416     fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1417         let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1418         let adjust = |index: &mut u32| {
1419             *index -= old_after_path_position;
1420             *index += new_after_path_position;
1421         };
1422         if let Some(ref mut index) = self.query_start { adjust(index) }
1423         if let Some(ref mut index) = self.fragment_start { adjust(index) }
1424         self.serialization.push_str(after_path)
1425     }
1426 
1427     /// Change this URL’s port number.
1428     ///
1429     /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1430     /// do nothing and return `Err`.
1431     ///
1432     /// # Examples
1433     ///
1434     /// ```
1435     /// use url::Url;
1436     /// # use std::error::Error;
1437     ///
1438     /// # fn run() -> Result<(), Box<Error>> {
1439     /// let mut url = Url::parse("ssh://example.net:2048/")?;
1440     ///
1441     /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1442     /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1443     ///
1444     /// url.set_port(None).map_err(|_| "cannot be base")?;
1445     /// assert_eq!(url.as_str(), "ssh://example.net/");
1446     /// # Ok(())
1447     /// # }
1448     /// # run().unwrap();
1449     /// ```
1450     ///
1451     /// Cannot set port for cannot-be-a-base URLs:
1452     ///
1453     /// ```
1454     /// use url::Url;
1455     /// # use url::ParseError;
1456     ///
1457     /// # fn run() -> Result<(), ParseError> {
1458     /// let mut url = Url::parse("mailto:rms@example.net")?;
1459     ///
1460     /// let result = url.set_port(Some(80));
1461     /// assert!(result.is_err());
1462     ///
1463     /// let result = url.set_port(None);
1464     /// assert!(result.is_err());
1465     /// # Ok(())
1466     /// # }
1467     /// # run().unwrap();
1468     /// ```
set_port(&mut self, mut port: Option<u16>) -> Result<(), ()>1469     pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1470         // has_host implies !cannot_be_a_base
1471         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1472             return Err(())
1473         }
1474         if port.is_some() && port == parser::default_port(self.scheme()) {
1475             port = None
1476         }
1477         self.set_port_internal(port);
1478         Ok(())
1479     }
1480 
set_port_internal(&mut self, port: Option<u16>)1481     fn set_port_internal(&mut self, port: Option<u16>) {
1482         match (self.port, port) {
1483             (None, None) => {}
1484             (Some(_), None) => {
1485                 self.serialization.drain(self.host_end as usize .. self.path_start as usize);
1486                 let offset = self.path_start - self.host_end;
1487                 self.path_start = self.host_end;
1488                 if let Some(ref mut index) = self.query_start { *index -= offset }
1489                 if let Some(ref mut index) = self.fragment_start { *index -= offset }
1490             }
1491             (Some(old), Some(new)) if old == new => {}
1492             (_, Some(new)) => {
1493                 let path_and_after = self.slice(self.path_start..).to_owned();
1494                 self.serialization.truncate(self.host_end as usize);
1495                 write!(&mut self.serialization, ":{}", new).unwrap();
1496                 let old_path_start = self.path_start;
1497                 let new_path_start = to_u32(self.serialization.len()).unwrap();
1498                 self.path_start = new_path_start;
1499                 let adjust = |index: &mut u32| {
1500                     *index -= old_path_start;
1501                     *index += new_path_start;
1502                 };
1503                 if let Some(ref mut index) = self.query_start { adjust(index) }
1504                 if let Some(ref mut index) = self.fragment_start { adjust(index) }
1505                 self.serialization.push_str(&path_and_after);
1506             }
1507         }
1508         self.port = port;
1509     }
1510 
1511     /// Change this URL’s host.
1512     ///
1513     /// Removing the host (calling this with `None`)
1514     /// will also remove any username, password, and port number.
1515     ///
1516     /// # Examples
1517     ///
1518     /// Change host:
1519     ///
1520     /// ```
1521     /// use url::Url;
1522     /// # use url::ParseError;
1523     ///
1524     /// # fn run() -> Result<(), ParseError> {
1525     /// let mut url = Url::parse("https://example.net")?;
1526     /// let result = url.set_host(Some("rust-lang.org"));
1527     /// assert!(result.is_ok());
1528     /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1529     /// # Ok(())
1530     /// # }
1531     /// # run().unwrap();
1532     /// ```
1533     ///
1534     /// Remove host:
1535     ///
1536     /// ```
1537     /// use url::Url;
1538     /// # use url::ParseError;
1539     ///
1540     /// # fn run() -> Result<(), ParseError> {
1541     /// let mut url = Url::parse("foo://example.net")?;
1542     /// let result = url.set_host(None);
1543     /// assert!(result.is_ok());
1544     /// assert_eq!(url.as_str(), "foo:/");
1545     /// # Ok(())
1546     /// # }
1547     /// # run().unwrap();
1548     /// ```
1549     ///
1550     /// Cannot remove host for 'special' schemes (e.g. `http`):
1551     ///
1552     /// ```
1553     /// use url::Url;
1554     /// # use url::ParseError;
1555     ///
1556     /// # fn run() -> Result<(), ParseError> {
1557     /// let mut url = Url::parse("https://example.net")?;
1558     /// let result = url.set_host(None);
1559     /// assert!(result.is_err());
1560     /// assert_eq!(url.as_str(), "https://example.net/");
1561     /// # Ok(())
1562     /// # }
1563     /// # run().unwrap();
1564     /// ```
1565     ///
1566     /// Cannot change or remove host for cannot-be-a-base URLs:
1567     ///
1568     /// ```
1569     /// use url::Url;
1570     /// # use url::ParseError;
1571     ///
1572     /// # fn run() -> Result<(), ParseError> {
1573     /// let mut url = Url::parse("mailto:rms@example.net")?;
1574     ///
1575     /// let result = url.set_host(Some("rust-lang.org"));
1576     /// assert!(result.is_err());
1577     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1578     ///
1579     /// let result = url.set_host(None);
1580     /// assert!(result.is_err());
1581     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1582     /// # Ok(())
1583     /// # }
1584     /// # run().unwrap();
1585     /// ```
1586     ///
1587     /// # Errors
1588     ///
1589     /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
1590     /// a [`ParseError`] variant will be returned.
1591     ///
1592     /// [`ParseError`]: enum.ParseError.html
set_host(&mut self, host: Option<&str>) -> Result<(), ParseError>1593     pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
1594         if self.cannot_be_a_base() {
1595             return Err(ParseError::SetHostOnCannotBeABaseUrl)
1596         }
1597 
1598         if let Some(host) = host {
1599             if host == "" && SchemeType::from(self.scheme()).is_special() {
1600                 return Err(ParseError::EmptyHost);
1601             }
1602             if SchemeType::from(self.scheme()).is_special() {
1603                 self.set_host_internal(Host::parse(host)?, None)
1604             } else {
1605                 self.set_host_internal(Host::parse_opaque(host)?, None)
1606             }
1607         } else if self.has_host() {
1608             if SchemeType::from(self.scheme()).is_special() {
1609                 return Err(ParseError::EmptyHost)
1610             }
1611             debug_assert!(self.byte_at(self.scheme_end) == b':');
1612             debug_assert!(self.byte_at(self.path_start) == b'/');
1613             let new_path_start = self.scheme_end + 1;
1614             self.serialization.drain(new_path_start as usize..self.path_start as usize);
1615             let offset = self.path_start - new_path_start;
1616             self.path_start = new_path_start;
1617             self.username_end = new_path_start;
1618             self.host_start = new_path_start;
1619             self.host_end = new_path_start;
1620             self.port = None;
1621             if let Some(ref mut index) = self.query_start { *index -= offset }
1622             if let Some(ref mut index) = self.fragment_start { *index -= offset }
1623         }
1624         Ok(())
1625     }
1626 
1627     /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>)1628     fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
1629         let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { self.host_end };
1630         let suffix = self.slice(old_suffix_pos..).to_owned();
1631         self.serialization.truncate(self.host_start as usize);
1632         if !self.has_authority() {
1633             debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
1634             debug_assert!(self.username_end == self.host_start);
1635             self.serialization.push('/');
1636             self.serialization.push('/');
1637             self.username_end += 2;
1638             self.host_start += 2;
1639         }
1640         write!(&mut self.serialization, "{}", host).unwrap();
1641         self.host_end = to_u32(self.serialization.len()).unwrap();
1642         self.host = host.into();
1643 
1644         if let Some(new_port) = opt_new_port {
1645             self.port = new_port;
1646             if let Some(port) = new_port {
1647                 write!(&mut self.serialization, ":{}", port).unwrap();
1648             }
1649         }
1650         let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
1651         self.serialization.push_str(&suffix);
1652 
1653         let adjust = |index: &mut u32| {
1654             *index -= old_suffix_pos;
1655             *index += new_suffix_pos;
1656         };
1657         adjust(&mut self.path_start);
1658         if let Some(ref mut index) = self.query_start { adjust(index) }
1659         if let Some(ref mut index) = self.fragment_start { adjust(index) }
1660     }
1661 
1662     /// Change this URL’s host to the given IP address.
1663     ///
1664     /// If this URL is cannot-be-a-base, do nothing and return `Err`.
1665     ///
1666     /// Compared to `Url::set_host`, this skips the host parser.
1667     ///
1668     /// # Examples
1669     ///
1670     /// ```rust
1671     /// use url::{Url, ParseError};
1672     ///
1673     /// # fn run() -> Result<(), ParseError> {
1674     /// let mut url = Url::parse("http://example.com")?;
1675     /// url.set_ip_host("127.0.0.1".parse().unwrap());
1676     /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1677     /// assert_eq!(url.as_str(), "http://127.0.0.1/");
1678     /// # Ok(())
1679     /// # }
1680     /// # run().unwrap();
1681     /// ```
1682     ///
1683     /// Cannot change URL's from mailto(cannot-be-base) to ip:
1684     ///
1685     /// ```rust
1686     /// use url::{Url, ParseError};
1687     ///
1688     /// # fn run() -> Result<(), ParseError> {
1689     /// let mut url = Url::parse("mailto:rms@example.com")?;
1690     /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
1691     ///
1692     /// assert_eq!(url.as_str(), "mailto:rms@example.com");
1693     /// assert!(result.is_err());
1694     /// # Ok(())
1695     /// # }
1696     /// # run().unwrap();
1697     /// ```
1698     ///
set_ip_host(&mut self, address: IpAddr) -> Result<(), ()>1699     pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
1700         if self.cannot_be_a_base() {
1701             return Err(())
1702         }
1703 
1704         let address = match address {
1705             IpAddr::V4(address) => Host::Ipv4(address),
1706             IpAddr::V6(address) => Host::Ipv6(address),
1707         };
1708         self.set_host_internal(address, None);
1709         Ok(())
1710     }
1711 
1712     /// Change this URL’s password.
1713     ///
1714     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1715     ///
1716     /// # Examples
1717     ///
1718     /// ```rust
1719     /// use url::{Url, ParseError};
1720     ///
1721     /// # fn run() -> Result<(), ParseError> {
1722     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1723     /// let result = url.set_password(Some("secret_password"));
1724     /// assert!(result.is_err());
1725     ///
1726     /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
1727     /// let result = url.set_password(Some("secret_password"));
1728     /// assert_eq!(url.password(), Some("secret_password"));
1729     ///
1730     /// let mut url = Url::parse("ftp://user2:@example.com")?;
1731     /// let result = url.set_password(Some("secret2"));
1732     /// assert!(result.is_ok());
1733     /// assert_eq!(url.password(), Some("secret2"));
1734     /// # Ok(())
1735     /// # }
1736     /// # run().unwrap();
1737     /// ```
set_password(&mut self, password: Option<&str>) -> Result<(), ()>1738     pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
1739         // has_host implies !cannot_be_a_base
1740         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1741             return Err(())
1742         }
1743         if let Some(password) = password {
1744             let host_and_after = self.slice(self.host_start..).to_owned();
1745             self.serialization.truncate(self.username_end as usize);
1746             self.serialization.push(':');
1747             self.serialization.extend(utf8_percent_encode(password, USERINFO_ENCODE_SET));
1748             self.serialization.push('@');
1749 
1750             let old_host_start = self.host_start;
1751             let new_host_start = to_u32(self.serialization.len()).unwrap();
1752             let adjust = |index: &mut u32| {
1753                 *index -= old_host_start;
1754                 *index += new_host_start;
1755             };
1756             self.host_start = new_host_start;
1757             adjust(&mut self.host_end);
1758             adjust(&mut self.path_start);
1759             if let Some(ref mut index) = self.query_start { adjust(index) }
1760             if let Some(ref mut index) = self.fragment_start { adjust(index) }
1761 
1762             self.serialization.push_str(&host_and_after);
1763         } else if self.byte_at(self.username_end) == b':' {  // If there is a password to remove
1764             let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
1765             debug_assert!(has_username_or_password);
1766             let username_start = self.scheme_end + 3;
1767             let empty_username = username_start == self.username_end;
1768             let start = self.username_end;  // Remove the ':'
1769             let end = if empty_username {
1770                 self.host_start // Remove the '@' as well
1771             } else {
1772                 self.host_start - 1  // Keep the '@' to separate the username from the host
1773             };
1774             self.serialization.drain(start as usize .. end as usize);
1775             let offset = end - start;
1776             self.host_start -= offset;
1777             self.host_end -= offset;
1778             self.path_start -= offset;
1779             if let Some(ref mut index) = self.query_start { *index -= offset }
1780             if let Some(ref mut index) = self.fragment_start { *index -= offset }
1781         }
1782         Ok(())
1783     }
1784 
1785     /// Change this URL’s username.
1786     ///
1787     /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
1788     /// # Examples
1789     ///
1790     /// Cannot setup username from mailto(cannot-be-base)
1791     ///
1792     /// ```rust
1793     /// use url::{Url, ParseError};
1794     ///
1795     /// # fn run() -> Result<(), ParseError> {
1796     /// let mut url = Url::parse("mailto:rmz@example.com")?;
1797     /// let result = url.set_username("user1");
1798     /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
1799     /// assert!(result.is_err());
1800     /// # Ok(())
1801     /// # }
1802     /// # run().unwrap();
1803     /// ```
1804     ///
1805     /// Setup username to user1
1806     ///
1807     /// ```rust
1808     /// use url::{Url, ParseError};
1809     ///
1810     /// # fn run() -> Result<(), ParseError> {
1811     /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
1812     /// let result = url.set_username("user1");
1813     /// assert!(result.is_ok());
1814     /// assert_eq!(url.username(), "user1");
1815     /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
1816     /// # Ok(())
1817     /// # }
1818     /// # run().unwrap();
1819     /// ```
set_username(&mut self, username: &str) -> Result<(), ()>1820     pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
1821         // has_host implies !cannot_be_a_base
1822         if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1823             return Err(())
1824         }
1825         let username_start = self.scheme_end + 3;
1826         debug_assert!(self.slice(self.scheme_end..username_start) == "://");
1827         if self.slice(username_start..self.username_end) == username {
1828             return Ok(())
1829         }
1830         let after_username = self.slice(self.username_end..).to_owned();
1831         self.serialization.truncate(username_start as usize);
1832         self.serialization.extend(utf8_percent_encode(username, USERINFO_ENCODE_SET));
1833 
1834         let mut removed_bytes = self.username_end;
1835         self.username_end = to_u32(self.serialization.len()).unwrap();
1836         let mut added_bytes = self.username_end;
1837 
1838         let new_username_is_empty = self.username_end == username_start;
1839         match (new_username_is_empty, after_username.chars().next()) {
1840             (true, Some('@')) => {
1841                 removed_bytes += 1;
1842                 self.serialization.push_str(&after_username[1..]);
1843             }
1844             (false, Some('@')) | (_, Some(':')) | (true, _) => {
1845                 self.serialization.push_str(&after_username);
1846             }
1847             (false, _) => {
1848                 added_bytes += 1;
1849                 self.serialization.push('@');
1850                 self.serialization.push_str(&after_username);
1851             }
1852         }
1853 
1854         let adjust = |index: &mut u32| {
1855             *index -= removed_bytes;
1856             *index += added_bytes;
1857         };
1858         adjust(&mut self.host_start);
1859         adjust(&mut self.host_end);
1860         adjust(&mut self.path_start);
1861         if let Some(ref mut index) = self.query_start { adjust(index) }
1862         if let Some(ref mut index) = self.fragment_start { adjust(index) }
1863         Ok(())
1864     }
1865 
1866     /// Change this URL’s scheme.
1867     ///
1868     /// Do nothing and return `Err` if:
1869     ///
1870     /// * The new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
1871     /// * This URL is cannot-be-a-base and the new scheme is one of
1872     ///   `http`, `https`, `ws`, `wss`, `ftp`, or `gopher`
1873     ///
1874     /// # Examples
1875     ///
1876     /// Change the URL’s scheme from `https` to `foo`:
1877     ///
1878     /// ```
1879     /// use url::Url;
1880     /// # use url::ParseError;
1881     ///
1882     /// # fn run() -> Result<(), ParseError> {
1883     /// let mut url = Url::parse("https://example.net")?;
1884     /// let result = url.set_scheme("foo");
1885     /// assert_eq!(url.as_str(), "foo://example.net/");
1886     /// assert!(result.is_ok());
1887     /// # Ok(())
1888     /// # }
1889     /// # run().unwrap();
1890     /// ```
1891     ///
1892     ///
1893     /// Cannot change URL’s scheme from `https` to `foõ`:
1894     ///
1895     /// ```
1896     /// use url::Url;
1897     /// # use url::ParseError;
1898     ///
1899     /// # fn run() -> Result<(), ParseError> {
1900     /// let mut url = Url::parse("https://example.net")?;
1901     /// let result = url.set_scheme("foõ");
1902     /// assert_eq!(url.as_str(), "https://example.net/");
1903     /// assert!(result.is_err());
1904     /// # Ok(())
1905     /// # }
1906     /// # run().unwrap();
1907     /// ```
1908     ///
1909     /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
1910     ///
1911     /// ```
1912     /// use url::Url;
1913     /// # use url::ParseError;
1914     ///
1915     /// # fn run() -> Result<(), ParseError> {
1916     /// let mut url = Url::parse("mailto:rms@example.net")?;
1917     /// let result = url.set_scheme("https");
1918     /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1919     /// assert!(result.is_err());
1920     /// # Ok(())
1921     /// # }
1922     /// # run().unwrap();
1923     /// ```
set_scheme(&mut self, scheme: &str) -> Result<(), ()>1924     pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
1925         let mut parser = Parser::for_setter(String::new());
1926         let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
1927         if !remaining.is_empty() ||
1928                 (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) {
1929             return Err(())
1930         }
1931         let old_scheme_end = self.scheme_end;
1932         let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
1933         let adjust = |index: &mut u32| {
1934             *index -= old_scheme_end;
1935             *index += new_scheme_end;
1936         };
1937 
1938         self.scheme_end = new_scheme_end;
1939         adjust(&mut self.username_end);
1940         adjust(&mut self.host_start);
1941         adjust(&mut self.host_end);
1942         adjust(&mut self.path_start);
1943         if let Some(ref mut index) = self.query_start { adjust(index) }
1944         if let Some(ref mut index) = self.fragment_start { adjust(index) }
1945 
1946         parser.serialization.push_str(self.slice(old_scheme_end..));
1947         self.serialization = parser.serialization;
1948         Ok(())
1949     }
1950 
1951     /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
1952     ///
1953     /// This returns `Err` if the given path is not absolute or,
1954     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
1955     ///
1956     /// # Examples
1957     ///
1958     /// On Unix-like platforms:
1959     ///
1960     /// ```
1961     /// # if cfg!(unix) {
1962     /// use url::Url;
1963     ///
1964     /// # fn run() -> Result<(), ()> {
1965     /// let url = Url::from_file_path("/tmp/foo.txt")?;
1966     /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
1967     ///
1968     /// let url = Url::from_file_path("../foo.txt");
1969     /// assert!(url.is_err());
1970     ///
1971     /// let url = Url::from_file_path("https://google.com/");
1972     /// assert!(url.is_err());
1973     /// # Ok(())
1974     /// # }
1975     /// # run().unwrap();
1976     /// # }
1977     /// ```
1978     #[cfg(any(unix, windows, target_os="redox"))]
from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>1979     pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
1980         let mut serialization = "file://".to_owned();
1981         let host_start = serialization.len() as u32;
1982         let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
1983         Ok(Url {
1984             serialization: serialization,
1985             scheme_end: "file".len() as u32,
1986             username_end: host_start,
1987             host_start: host_start,
1988             host_end: host_end,
1989             host: host,
1990             port: None,
1991             path_start: host_end,
1992             query_start: None,
1993             fragment_start: None,
1994         })
1995     }
1996 
1997     /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
1998     ///
1999     /// This returns `Err` if the given path is not absolute or,
2000     /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2001     ///
2002     /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2003     /// so that the entire path is considered when using this URL as a base URL.
2004     ///
2005     /// For example:
2006     ///
2007     /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2008     ///   as the base URL is `file:///var/www/index.html`
2009     /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2010     ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2011     ///
2012     /// Note that `std::path` does not consider trailing slashes significant
2013     /// and usually does not include them (e.g. in `Path::parent()`).
2014     #[cfg(any(unix, windows, target_os="redox"))]
from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()>2015     pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
2016         let mut url = Url::from_file_path(path)?;
2017         if !url.serialization.ends_with('/') {
2018             url.serialization.push('/')
2019         }
2020         Ok(url)
2021     }
2022 
2023     /// Serialize with Serde using the internal representation of the `Url` struct.
2024     ///
2025     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2026     /// for speed, compared to the `Deserialize` trait impl.
2027     ///
2028     /// This method is only available if the `serde` Cargo feature is enabled.
2029     #[cfg(feature = "serde")]
2030     #[deny(unused)]
serialize_internal<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer2031     pub fn serialize_internal<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer {
2032         use serde::Serialize;
2033         // Destructuring first lets us ensure that adding or removing fields forces this method
2034         // to be updated
2035         let Url { ref serialization, ref scheme_end,
2036                   ref username_end, ref host_start,
2037                   ref host_end, ref host, ref port,
2038                   ref path_start, ref query_start,
2039                   ref fragment_start} = *self;
2040         (serialization, scheme_end, username_end,
2041          host_start, host_end, host, port, path_start,
2042          query_start, fragment_start).serialize(serializer)
2043     }
2044 
2045     /// Serialize with Serde using the internal representation of the `Url` struct.
2046     ///
2047     /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2048     /// for speed, compared to the `Deserialize` trait impl.
2049     ///
2050     /// This method is only available if the `serde` Cargo feature is enabled.
2051     #[cfg(feature = "serde")]
2052     #[deny(unused)]
deserialize_internal<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: serde::Deserializer2053     pub fn deserialize_internal<D>(deserializer: &mut D) -> Result<Self, D::Error> where D: serde::Deserializer {
2054         use serde::{Deserialize, Error};
2055         let (serialization, scheme_end, username_end,
2056              host_start, host_end, host, port, path_start,
2057              query_start, fragment_start) = Deserialize::deserialize(deserializer)?;
2058         let url = Url {
2059             serialization: serialization,
2060             scheme_end: scheme_end,
2061             username_end: username_end,
2062             host_start: host_start,
2063             host_end: host_end,
2064             host: host,
2065             port: port,
2066             path_start: path_start,
2067             query_start: query_start,
2068             fragment_start: fragment_start
2069         };
2070         if cfg!(debug_assertions) {
2071             url.check_invariants().map_err(|ref reason| Error::invalid_value(&reason))?
2072         }
2073         Ok(url)
2074     }
2075 
2076 
2077     /// Assuming the URL is in the `file` scheme or similar,
2078     /// convert its path to an absolute `std::path::Path`.
2079     ///
2080     /// **Note:** This does not actually check the URL’s `scheme`,
2081     /// and may give nonsensical results for other schemes.
2082     /// It is the user’s responsibility to check the URL’s scheme before calling this.
2083     ///
2084     /// ```
2085     /// # use url::Url;
2086     /// # let url = Url::parse("file:///etc/passwd").unwrap();
2087     /// let path = url.to_file_path();
2088     /// ```
2089     ///
2090     /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2091     /// `file:` URLs may have a non-local host),
2092     /// or if `Path::new_opt()` returns `None`.
2093     /// (That is, if the percent-decoded path contains a NUL byte or,
2094     /// for a Windows path, is not UTF-8.)
2095     #[inline]
2096     #[cfg(any(unix, windows, target_os="redox"))]
to_file_path(&self) -> Result<PathBuf, ()>2097     pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2098         if let Some(segments) = self.path_segments() {
2099             let host = match self.host() {
2100                 None | Some(Host::Domain("localhost")) => None,
2101                 Some(_) if cfg!(windows) && self.scheme() == "file" => {
2102                     Some(&self.serialization[self.host_start as usize .. self.host_end as usize])
2103                 },
2104                 _ => return Err(())
2105             };
2106 
2107             return file_url_segments_to_pathbuf(host, segments);
2108         }
2109         Err(())
2110     }
2111 
2112     // Private helper methods:
2113 
2114     #[inline]
slice<R>(&self, range: R) -> &str where R: RangeArg2115     fn slice<R>(&self, range: R) -> &str where R: RangeArg {
2116         range.slice_of(&self.serialization)
2117     }
2118 
2119     #[inline]
byte_at(&self, i: u32) -> u82120     fn byte_at(&self, i: u32) -> u8 {
2121         self.serialization.as_bytes()[i as usize]
2122     }
2123 }
2124 
2125 /// Return an error if `Url::host` or `Url::port_or_known_default` return `None`.
2126 impl ToSocketAddrs for Url {
2127     type Iter = SocketAddrs;
2128 
to_socket_addrs(&self) -> io::Result<Self::Iter>2129     fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
2130         self.with_default_port(|_| Err(()))?.to_socket_addrs()
2131     }
2132 }
2133 
2134 /// Parse a string as an URL, without a base URL or encoding override.
2135 impl str::FromStr for Url {
2136     type Err = ParseError;
2137 
2138     #[inline]
from_str(input: &str) -> Result<Url, ::ParseError>2139     fn from_str(input: &str) -> Result<Url, ::ParseError> {
2140         Url::parse(input)
2141     }
2142 }
2143 
2144 /// Display the serialization of this URL.
2145 impl fmt::Display for Url {
2146     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2147     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2148         fmt::Display::fmt(&self.serialization, formatter)
2149     }
2150 }
2151 
2152 /// Debug the serialization of this URL.
2153 impl fmt::Debug for Url {
2154     #[inline]
fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result2155     fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2156         fmt::Debug::fmt(&self.serialization, formatter)
2157     }
2158 }
2159 
2160 /// URLs compare like their serialization.
2161 impl Eq for Url {}
2162 
2163 /// URLs compare like their serialization.
2164 impl PartialEq for Url {
2165     #[inline]
eq(&self, other: &Self) -> bool2166     fn eq(&self, other: &Self) -> bool {
2167         self.serialization == other.serialization
2168     }
2169 }
2170 
2171 /// URLs compare like their serialization.
2172 impl Ord for Url {
2173     #[inline]
cmp(&self, other: &Self) -> cmp::Ordering2174     fn cmp(&self, other: &Self) -> cmp::Ordering {
2175         self.serialization.cmp(&other.serialization)
2176     }
2177 }
2178 
2179 /// URLs compare like their serialization.
2180 impl PartialOrd for Url {
2181     #[inline]
partial_cmp(&self, other: &Self) -> Option<cmp::Ordering>2182     fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2183         self.serialization.partial_cmp(&other.serialization)
2184     }
2185 }
2186 
2187 /// URLs hash like their serialization.
2188 impl hash::Hash for Url {
2189     #[inline]
hash<H>(&self, state: &mut H) where H: hash::Hasher2190     fn hash<H>(&self, state: &mut H) where H: hash::Hasher {
2191         hash::Hash::hash(&self.serialization, state)
2192     }
2193 }
2194 
2195 /// Return the serialization of this URL.
2196 impl AsRef<str> for Url {
2197     #[inline]
as_ref(&self) -> &str2198     fn as_ref(&self) -> &str {
2199         &self.serialization
2200     }
2201 }
2202 
2203 trait RangeArg {
slice_of<'a>(&self, s: &'a str) -> &'a str2204     fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2205 }
2206 
2207 impl RangeArg for Range<u32> {
2208     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2209     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2210         &s[self.start as usize .. self.end as usize]
2211     }
2212 }
2213 
2214 impl RangeArg for RangeFrom<u32> {
2215     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2216     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2217         &s[self.start as usize ..]
2218     }
2219 }
2220 
2221 impl RangeArg for RangeTo<u32> {
2222     #[inline]
slice_of<'a>(&self, s: &'a str) -> &'a str2223     fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2224         &s[.. self.end as usize]
2225     }
2226 }
2227 
2228 #[cfg(feature="rustc-serialize")]
2229 impl rustc_serialize::Encodable for Url {
encode<S: rustc_serialize::Encoder>(&self, encoder: &mut S) -> Result<(), S::Error>2230     fn encode<S: rustc_serialize::Encoder>(&self, encoder: &mut S) -> Result<(), S::Error> {
2231         encoder.emit_str(self.as_str())
2232     }
2233 }
2234 
2235 
2236 #[cfg(feature="rustc-serialize")]
2237 impl rustc_serialize::Decodable for Url {
decode<D: rustc_serialize::Decoder>(decoder: &mut D) -> Result<Url, D::Error>2238     fn decode<D: rustc_serialize::Decoder>(decoder: &mut D) -> Result<Url, D::Error> {
2239         Url::parse(&*decoder.read_str()?).map_err(|error| {
2240             decoder.error(&format!("URL parsing error: {}", error))
2241         })
2242     }
2243 }
2244 
2245 /// Serializes this URL into a `serde` stream.
2246 ///
2247 /// This implementation is only available if the `serde` Cargo feature is enabled.
2248 #[cfg(feature="serde")]
2249 impl serde::Serialize for Url {
serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer2250     fn serialize<S>(&self, serializer: &mut S) -> Result<(), S::Error> where S: serde::Serializer {
2251         serializer.serialize_str(self.as_str())
2252     }
2253 }
2254 
2255 /// Deserializes this URL from a `serde` stream.
2256 ///
2257 /// This implementation is only available if the `serde` Cargo feature is enabled.
2258 #[cfg(feature="serde")]
2259 impl serde::Deserialize for Url {
deserialize<D>(deserializer: &mut D) -> Result<Url, D::Error> where D: serde::Deserializer2260     fn deserialize<D>(deserializer: &mut D) -> Result<Url, D::Error> where D: serde::Deserializer {
2261         let string_representation: String = serde::Deserialize::deserialize(deserializer)?;
2262         Url::parse(&string_representation).map_err(|err| {
2263             serde::Error::invalid_value(err.description())
2264         })
2265     }
2266 }
2267 
2268 #[cfg(any(unix, target_os = "redox"))]
path_to_file_url_segments(path: &Path, serialization: &mut String) -> Result<(u32, HostInternal), ()>2269 fn path_to_file_url_segments(path: &Path, serialization: &mut String)
2270                              -> Result<(u32, HostInternal), ()> {
2271     use std::os::unix::prelude::OsStrExt;
2272     if !path.is_absolute() {
2273         return Err(())
2274     }
2275     let host_end = to_u32(serialization.len()).unwrap();
2276     let mut empty = true;
2277     // skip the root component
2278     for component in path.components().skip(1) {
2279         empty = false;
2280         serialization.push('/');
2281         serialization.extend(percent_encode(
2282             component.as_os_str().as_bytes(), PATH_SEGMENT_ENCODE_SET));
2283     }
2284     if empty {
2285         // An URL’s path must not be empty.
2286         serialization.push('/');
2287     }
2288     Ok((host_end, HostInternal::None))
2289 }
2290 
2291 #[cfg(windows)]
path_to_file_url_segments(path: &Path, serialization: &mut String) -> Result<(u32, HostInternal), ()>2292 fn path_to_file_url_segments(path: &Path, serialization: &mut String)
2293                              -> Result<(u32, HostInternal), ()> {
2294     path_to_file_url_segments_windows(path, serialization)
2295 }
2296 
2297 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2298 #[cfg_attr(not(windows), allow(dead_code))]
path_to_file_url_segments_windows(path: &Path, serialization: &mut String) -> Result<(u32, HostInternal), ()>2299 fn path_to_file_url_segments_windows(path: &Path, serialization: &mut String)
2300                                      -> Result<(u32, HostInternal), ()> {
2301     use std::path::{Prefix, Component};
2302     if !path.is_absolute() {
2303         return Err(())
2304     }
2305     let mut components = path.components();
2306 
2307     let host_end;
2308     let host_internal;
2309     match components.next() {
2310         Some(Component::Prefix(ref p)) => match p.kind() {
2311             Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2312                 host_end = to_u32(serialization.len()).unwrap();
2313                 host_internal = HostInternal::None;
2314                 serialization.push('/');
2315                 serialization.push(letter as char);
2316                 serialization.push(':');
2317             },
2318             Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2319                 let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2320                 write!(serialization, "{}", host).unwrap();
2321                 host_end = to_u32(serialization.len()).unwrap();
2322                 host_internal = host.into();
2323                 serialization.push('/');
2324                 let share = share.to_str().ok_or(())?;
2325                 serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT_ENCODE_SET));
2326             },
2327             _ => return Err(())
2328         },
2329 
2330         _ => return Err(())
2331     }
2332 
2333     for component in components {
2334         if component == Component::RootDir { continue }
2335         // FIXME: somehow work with non-unicode?
2336         let component = component.as_os_str().to_str().ok_or(())?;
2337         serialization.push('/');
2338         serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT_ENCODE_SET));
2339     }
2340     Ok((host_end, host_internal))
2341 }
2342 
2343 
2344 #[cfg(any(unix, target_os = "redox"))]
file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()>2345 fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()> {
2346     use std::ffi::OsStr;
2347     use std::os::unix::prelude::OsStrExt;
2348     use std::path::PathBuf;
2349 
2350     if host.is_some() {
2351         return Err(());
2352     }
2353 
2354     let mut bytes = Vec::new();
2355     for segment in segments {
2356         bytes.push(b'/');
2357         bytes.extend(percent_decode(segment.as_bytes()));
2358     }
2359     let os_str = OsStr::from_bytes(&bytes);
2360     let path = PathBuf::from(os_str);
2361     debug_assert!(path.is_absolute(),
2362                   "to_file_path() failed to produce an absolute Path");
2363     Ok(path)
2364 }
2365 
2366 #[cfg(windows)]
file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()>2367 fn file_url_segments_to_pathbuf(host: Option<&str>, segments: str::Split<char>) -> Result<PathBuf, ()> {
2368     file_url_segments_to_pathbuf_windows(host, segments)
2369 }
2370 
2371 // Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2372 #[cfg_attr(not(windows), allow(dead_code))]
file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::Split<char>) -> Result<PathBuf, ()>2373 fn file_url_segments_to_pathbuf_windows(host: Option<&str>, mut segments: str::Split<char>) -> Result<PathBuf, ()> {
2374 
2375     let mut string = if let Some(host) = host {
2376         r"\\".to_owned() + host
2377     } else {
2378         let first = segments.next().ok_or(())?;
2379 
2380         match first.len() {
2381             2 => {
2382                 if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
2383                     return Err(())
2384                 }
2385 
2386                 first.to_owned()
2387             },
2388 
2389             4 => {
2390                 if !first.starts_with(parser::ascii_alpha) {
2391                     return Err(())
2392                 }
2393                 let bytes = first.as_bytes();
2394                 if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
2395                     return Err(())
2396                 }
2397 
2398                 first[0..1].to_owned() + ":"
2399             },
2400 
2401             _ => return Err(()),
2402         }
2403     };
2404 
2405     for segment in segments {
2406         string.push('\\');
2407 
2408         // Currently non-unicode windows paths cannot be represented
2409         match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
2410             Ok(s) => string.push_str(&s),
2411             Err(..) => return Err(()),
2412         }
2413     }
2414     let path = PathBuf::from(string);
2415     debug_assert!(path.is_absolute(),
2416                   "to_file_path() failed to produce an absolute Path");
2417     Ok(path)
2418 }
2419 
io_error<T>(reason: &str) -> io::Result<T>2420 fn io_error<T>(reason: &str) -> io::Result<T> {
2421     Err(io::Error::new(io::ErrorKind::InvalidData, reason))
2422 }
2423 
2424 /// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
2425 #[derive(Debug)]
2426 pub struct UrlQuery<'a> {
2427     url: Option<&'a mut Url>,
2428     fragment: Option<String>,
2429 }
2430 
2431 impl<'a> Drop for UrlQuery<'a> {
drop(&mut self)2432     fn drop(&mut self) {
2433         if let Some(url) = self.url.take() {
2434             url.restore_already_parsed_fragment(self.fragment.take())
2435         }
2436     }
2437 }
2438 
2439 
2440 /// Define a new struct
2441 /// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait,
2442 /// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html)
2443 /// and related functions.
2444 ///
2445 /// Parameters are characters to include in the set in addition to those of the base set.
2446 /// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
2447 ///
2448 /// Example
2449 /// =======
2450 ///
2451 /// ```rust
2452 /// #[macro_use] extern crate url;
2453 /// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
2454 /// define_encode_set! {
2455 ///     /// This encode set is used in the URL parser for query strings.
2456 ///     pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
2457 /// }
2458 /// # fn main() {
2459 /// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar");
2460 /// # }
2461 /// ```
2462 #[macro_export]
2463 macro_rules! define_encode_set {
2464     ($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => {
2465         $(#[$attr])*
2466         #[derive(Copy, Clone)]
2467         #[allow(non_camel_case_types)]
2468         pub struct $name;
2469 
2470         impl $crate::percent_encoding::EncodeSet for $name {
2471             #[inline]
2472             fn contains(&self, byte: u8) -> bool {
2473                 match byte as char {
2474                     $(
2475                         $ch => true,
2476                     )*
2477                     _ => $base_set.contains(byte)
2478                 }
2479             }
2480         }
2481     }
2482 }
2483