1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use std::fmt;
11 use std::hash::{Hash, Hasher};
12 use std::ptr;
13 use std::str;
14 
15 use position;
16 
17 /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
18 ///
19 /// [two `Position`s]: struct.Position.html#method.span
20 /// [`Pair`]: ../iterators/struct.Pair.html#method.span
21 #[derive(Clone)]
22 pub struct Span<'i> {
23     input: &'i str,
24     /// # Safety
25     ///
26     /// Must be a valid character boundary index into `input`.
27     start: usize,
28     /// # Safety
29     ///
30     /// Must be a valid character boundary index into `input`.
31     end: usize,
32 }
33 
34 impl<'i> Span<'i> {
35     /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
36     ///
37     /// # Safety
38     ///
39     /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
new_unchecked(input: &str, start: usize, end: usize) -> Span40     pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span {
41         debug_assert!(input.get(start..end).is_some());
42         Span { input, start, end }
43     }
44 
45     /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
46     /// into `input`.
47     ///
48     /// # Examples
49     ///
50     /// ```
51     /// # use pest::Span;
52     /// let input = "Hello!";
53     /// assert_eq!(None, Span::new(input, 100, 0));
54     /// assert!(Span::new(input, 0, input.len()).is_some());
55     /// ```
56     #[allow(clippy::new_ret_no_self)]
new(input: &str, start: usize, end: usize) -> Option<Span>57     pub fn new(input: &str, start: usize, end: usize) -> Option<Span> {
58         if input.get(start..end).is_some() {
59             Some(Span { input, start, end })
60         } else {
61             None
62         }
63     }
64 
65     /// Returns the `Span`'s start byte position as a `usize`.
66     ///
67     /// # Examples
68     ///
69     /// ```
70     /// # use pest::Position;
71     /// let input = "ab";
72     /// let start = Position::from_start(input);
73     /// let end = start.clone();
74     /// let span = start.span(&end);
75     ///
76     /// assert_eq!(span.start(), 0);
77     /// ```
78     #[inline]
start(&self) -> usize79     pub fn start(&self) -> usize {
80         self.start
81     }
82 
83     /// Returns the `Span`'s end byte position as a `usize`.
84     ///
85     /// # Examples
86     ///
87     /// ```
88     /// # use pest::Position;
89     /// let input = "ab";
90     /// let start = Position::from_start(input);
91     /// let end = start.clone();
92     /// let span = start.span(&end);
93     ///
94     /// assert_eq!(span.end(), 0);
95     /// ```
96     #[inline]
end(&self) -> usize97     pub fn end(&self) -> usize {
98         self.end
99     }
100 
101     /// Returns the `Span`'s start `Position`.
102     ///
103     /// # Examples
104     ///
105     /// ```
106     /// # use pest::Position;
107     /// let input = "ab";
108     /// let start = Position::from_start(input);
109     /// let end = start.clone();
110     /// let span = start.clone().span(&end);
111     ///
112     /// assert_eq!(span.start_pos(), start);
113     /// ```
114     #[inline]
start_pos(&self) -> position::Position<'i>115     pub fn start_pos(&self) -> position::Position<'i> {
116         // Span's start position is always a UTF-8 border.
117         unsafe { position::Position::new_unchecked(self.input, self.start) }
118     }
119 
120     /// Returns the `Span`'s end `Position`.
121     ///
122     /// # Examples
123     ///
124     /// ```
125     /// # use pest::Position;
126     /// let input = "ab";
127     /// let start = Position::from_start(input);
128     /// let end = start.clone();
129     /// let span = start.span(&end);
130     ///
131     /// assert_eq!(span.end_pos(), end);
132     /// ```
133     #[inline]
end_pos(&self) -> position::Position<'i>134     pub fn end_pos(&self) -> position::Position<'i> {
135         // Span's end position is always a UTF-8 border.
136         unsafe { position::Position::new_unchecked(self.input, self.end) }
137     }
138 
139     /// Splits the `Span` into a pair of `Position`s.
140     ///
141     /// # Examples
142     ///
143     /// ```
144     /// # use pest::Position;
145     /// let input = "ab";
146     /// let start = Position::from_start(input);
147     /// let end = start.clone();
148     /// let span = start.clone().span(&end);
149     ///
150     /// assert_eq!(span.split(), (start, end));
151     /// ```
152     #[inline]
split(self) -> (position::Position<'i>, position::Position<'i>)153     pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
154         // Span's start and end positions are always a UTF-8 borders.
155         let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
156         let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
157 
158         (pos1, pos2)
159     }
160 
161     /// Captures a slice from the `&str` defined by the `Span`.
162     ///
163     /// # Examples
164     ///
165     /// ```
166     /// # use pest;
167     /// # #[allow(non_camel_case_types)]
168     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
169     /// enum Rule {}
170     ///
171     /// let input = "abc";
172     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input).skip(1).unwrap();
173     /// let start_pos = state.position().clone();
174     /// state = state.match_string("b").unwrap();
175     /// let span = start_pos.span(&state.position().clone());
176     /// assert_eq!(span.as_str(), "b");
177     /// ```
178     #[inline]
as_str(&self) -> &'i str179     pub fn as_str(&self) -> &'i str {
180         // Span's start and end positions are always a UTF-8 borders.
181         &self.input[self.start..self.end]
182     }
183 
184     /// Iterates over all lines (partially) covered by this span.
185     ///
186     /// # Examples
187     ///
188     /// ```
189     /// # use pest;
190     /// # #[allow(non_camel_case_types)]
191     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
192     /// enum Rule {}
193     ///
194     /// let input = "a\nb\nc";
195     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input).skip(2).unwrap();
196     /// let start_pos = state.position().clone();
197     /// state = state.match_string("b\nc").unwrap();
198     /// let span = start_pos.span(&state.position().clone());
199     /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
200     /// ```
201     #[inline]
lines(&self) -> Lines202     pub fn lines(&self) -> Lines {
203         Lines {
204             span: self,
205             pos: self.start,
206         }
207     }
208 }
209 
210 impl<'i> fmt::Debug for Span<'i> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result211     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
212         f.debug_struct("Span")
213             .field("str", &self.as_str())
214             .field("start", &self.start)
215             .field("end", &self.end)
216             .finish()
217     }
218 }
219 
220 impl<'i> PartialEq for Span<'i> {
eq(&self, other: &Span<'i>) -> bool221     fn eq(&self, other: &Span<'i>) -> bool {
222         ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
223     }
224 }
225 
226 impl<'i> Eq for Span<'i> {}
227 
228 impl<'i> Hash for Span<'i> {
hash<H: Hasher>(&self, state: &mut H)229     fn hash<H: Hasher>(&self, state: &mut H) {
230         (self.input as *const str).hash(state);
231         self.start.hash(state);
232         self.end.hash(state);
233     }
234 }
235 
236 /// Line iterator for Spans, created by [`Span::lines()`].
237 ///
238 /// Iterates all lines that are at least partially covered by the span.
239 ///
240 /// [`Span::lines()`]: struct.Span.html#method.lines
241 pub struct Lines<'i> {
242     span: &'i Span<'i>,
243     pos: usize,
244 }
245 
246 impl<'i> Iterator for Lines<'i> {
247     type Item = &'i str;
next(&mut self) -> Option<&'i str>248     fn next(&mut self) -> Option<&'i str> {
249         if self.pos > self.span.end {
250             return None;
251         }
252         let pos = position::Position::new(self.span.input, self.pos)?;
253         if pos.at_end() {
254             return None;
255         }
256         let line = pos.line_of();
257         self.pos = pos.find_line_end();
258         Some(line)
259     }
260 }
261 
262 #[cfg(test)]
263 mod tests {
264     use super::*;
265 
266     #[test]
split()267     fn split() {
268         let input = "a";
269         let start = position::Position::from_start(input);
270         let mut end = start.clone();
271 
272         assert!(end.skip(1));
273 
274         let span = start.clone().span(&end.clone());
275 
276         assert_eq!(span.split(), (start, end));
277     }
278 
279     #[test]
lines_mid()280     fn lines_mid() {
281         let input = "abc\ndef\nghi";
282         let span = Span::new(input, 1, 7).unwrap();
283         let lines: Vec<_> = span.lines().collect();
284         println!("{:?}", lines);
285         assert_eq!(lines.len(), 2);
286         assert_eq!(lines[0], "abc\n".to_owned());
287         assert_eq!(lines[1], "def\n".to_owned());
288     }
289 
290     #[test]
lines_eof()291     fn lines_eof() {
292         let input = "abc\ndef\nghi";
293         let span = Span::new(input, 5, 11).unwrap();
294         assert!(span.end_pos().at_end());
295         let lines: Vec<_> = span.lines().collect();
296         println!("{:?}", lines);
297         assert_eq!(lines.len(), 2);
298         assert_eq!(lines[0], "def\n".to_owned());
299         assert_eq!(lines[1], "ghi".to_owned());
300     }
301 }
302