1 #![allow(dead_code)]
2 
3 use std::borrow::Cow;
4 use std::ops::{Index, Range};
5 use std::fmt::{self, Debug};
6 
7 use pear::{Input, Length};
8 
9 use crate::ext::IntoOwned;
10 
11 pub type IndexedString = Indexed<'static, str>;
12 pub type IndexedStr<'a> = Indexed<'a, str>;
13 pub type IndexedBytes<'a> = Indexed<'a, [u8]>;
14 
15 pub trait AsPtr {
as_ptr(&self) -> *const u816     fn as_ptr(&self) -> *const u8;
17 }
18 
19 impl AsPtr for str {
20     #[inline(always)]
as_ptr(&self) -> *const u821     fn as_ptr(&self) -> *const u8 {
22         str::as_ptr(self)
23     }
24 }
25 
26 impl AsPtr for [u8] {
27     #[inline(always)]
as_ptr(&self) -> *const u828     fn as_ptr(&self) -> *const u8 {
29         <[u8]>::as_ptr(self)
30     }
31 }
32 
33 #[derive(PartialEq)]
34 pub enum Indexed<'a, T: ?Sized + ToOwned> {
35     Indexed(usize, usize),
36     Concrete(Cow<'a, T>)
37 }
38 
39 impl<'a, T: ?Sized + ToOwned + 'a, C: Into<Cow<'a, T>>> From<C> for Indexed<'a, T> {
40     #[inline(always)]
from(value: C) -> Indexed<'a, T>41     fn from(value: C) -> Indexed<'a, T> {
42         Indexed::Concrete(value.into())
43     }
44 }
45 
46 impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> {
47     /// Panics if `self` is not an `Indexed`.
48     #[inline(always)]
indices(self) -> (usize, usize)49     pub fn indices(self) -> (usize, usize) {
50         match self {
51             Indexed::Indexed(a, b) => (a, b),
52             _ => panic!("cannot convert indexed T to U unless indexed")
53         }
54     }
55 
56     /// Panics if `self` is not an `Indexed`.
57     #[inline(always)]
coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U>58     pub fn coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U> {
59         match self {
60             Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
61             _ => panic!("cannot convert indexed T to U unless indexed")
62         }
63     }
64 
65     /// Panics if `self` is not an `Indexed`.
66     #[inline(always)]
coerce_lifetime<'b>(self) -> Indexed<'b, T>67     pub fn coerce_lifetime<'b>(self) -> Indexed<'b, T> {
68         match self {
69             Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
70             _ => panic!("cannot coerce lifetime unless indexed")
71         }
72     }
73 }
74 
75 impl<T: 'static + ?Sized + ToOwned> IntoOwned for Indexed<'_, T> {
76     type Owned = Indexed<'static, T>;
77 
into_owned(self) -> Indexed<'static, T>78     fn into_owned(self) -> Indexed<'static, T> {
79         match self {
80             Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
81             Indexed::Concrete(cow) => Indexed::Concrete(IntoOwned::into_owned(cow))
82         }
83     }
84 }
85 
86 use std::ops::Add;
87 
88 impl<'a, T: ?Sized + ToOwned + 'a> Add for Indexed<'a, T> {
89     type Output = Indexed<'a, T>;
90 
91     #[inline]
add(self, other: Indexed<'a, T>) -> Indexed<'a, T>92     fn add(self, other: Indexed<'a, T>) -> Indexed<'a, T> {
93         match self {
94             Indexed::Indexed(a, b) => match other {
95                 Indexed::Indexed(c, d) if b == c && a < d => Indexed::Indexed(a, d),
96                 _ => panic!("+ requires indexed")
97             }
98             _ => panic!("+ requires indexed")
99         }
100     }
101 }
102 
103 impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T>
104     where T: Length + AsPtr + Index<Range<usize>, Output = T>
105 {
106     // Returns `None` if `needle` is not a substring of `haystack`.
checked_from(needle: &T, haystack: &T) -> Option<Indexed<'a, T>>107     pub fn checked_from(needle: &T, haystack: &T) -> Option<Indexed<'a, T>> {
108         let haystack_start = haystack.as_ptr() as usize;
109         let needle_start = needle.as_ptr() as usize;
110 
111         if needle_start < haystack_start {
112             return None;
113         }
114 
115         if (needle_start + needle.len()) > (haystack_start + haystack.len()) {
116             return None;
117         }
118 
119         let start = needle_start - haystack_start;
120         let end = start + needle.len();
121         Some(Indexed::Indexed(start, end))
122     }
123 
124     // Caller must ensure that `needle` is a substring of `haystack`.
unchecked_from(needle: &T, haystack: &T) -> Indexed<'a, T>125     pub unsafe fn unchecked_from(needle: &T, haystack: &T) -> Indexed<'a, T> {
126         let haystack_start = haystack.as_ptr() as usize;
127         let needle_start = needle.as_ptr() as usize;
128 
129         let start = needle_start - haystack_start;
130         let end = start + needle.len();
131         Indexed::Indexed(start, end)
132     }
133 
134     /// Whether this string is derived from indexes or not.
135     #[inline]
is_indexed(&self) -> bool136     pub fn is_indexed(&self) -> bool {
137         match *self {
138             Indexed::Indexed(..) => true,
139             Indexed::Concrete(..) => false,
140         }
141     }
142 
143     /// Whether this string is derived from indexes or not.
144     #[inline]
is_empty(&self) -> bool145     pub fn is_empty(&self) -> bool {
146         self.len() == 0
147     }
148 
149     /// Retrieves the string `self` corresponds to. If `self` is derived from
150     /// indexes, the corresponding subslice of `source` is returned. Otherwise,
151     /// the concrete string is returned.
152     ///
153     /// # Panics
154     ///
155     /// Panics if `self` is an indexed string and `string` is None.
156     // pub fn to_source(&self, source: Option<&'a T>) -> &T {
from_cow_source<'s>(&'s self, source: &'s Option<Cow<'_, T>>) -> &'s T157     pub fn from_cow_source<'s>(&'s self, source: &'s Option<Cow<'_, T>>) -> &'s T {
158         if self.is_indexed() && source.is_none() {
159             panic!("Cannot convert indexed str to str without base string!")
160         }
161 
162         match *self {
163             Indexed::Indexed(i, j) => &source.as_ref().unwrap()[i..j],
164             Indexed::Concrete(ref mstr) => mstr.as_ref(),
165         }
166     }
167 
168     /// Retrieves the string `self` corresponds to. If `self` is derived from
169     /// indexes, the corresponding subslice of `string` is returned. Otherwise,
170     /// the concrete string is returned.
171     ///
172     /// # Panics
173     ///
174     /// Panics if `self` is an indexed string and `string` is None.
from_source<'s>(&'s self, source: Option<&'s T>) -> &'s T175     pub fn from_source<'s>(&'s self, source: Option<&'s T>) -> &'s T {
176         if self.is_indexed() && source.is_none() {
177             panic!("Cannot convert indexed str to str without base string!")
178         }
179 
180         match *self {
181             Indexed::Indexed(i, j) => &source.unwrap()[(i as usize)..(j as usize)],
182             Indexed::Concrete(ref mstr) => &*mstr,
183         }
184     }
185 }
186 
187 impl<'a, T: ToOwned + ?Sized + 'a> Clone for Indexed<'a, T> {
clone(&self) -> Self188     fn clone(&self) -> Self {
189         match *self {
190             Indexed::Indexed(a, b) => Indexed::Indexed(a, b),
191             Indexed::Concrete(ref cow) => Indexed::Concrete(cow.clone())
192         }
193     }
194 }
195 
196 impl<'a, T: ?Sized + 'a> Debug for Indexed<'a, T>
197     where T: ToOwned + Debug, T::Owned: Debug
198 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result199     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
200         match *self {
201             Indexed::Indexed(a, b) => fmt::Debug::fmt(&(a, b), f),
202             Indexed::Concrete(ref cow) => fmt::Debug::fmt(cow, f),
203         }
204     }
205 }
206 
207 impl<'a, T: ?Sized + Length + ToOwned + 'a> Length for Indexed<'a, T> {
208     #[inline(always)]
len(&self) -> usize209     fn len(&self) -> usize {
210         match *self {
211             Indexed::Indexed(a, b) => (b - a) as usize,
212             Indexed::Concrete(ref cow) => cow.len()
213         }
214     }
215 }
216 
217 #[derive(Debug)]
218 pub struct IndexedInput<'a, T: ?Sized> {
219     source: &'a T,
220     current: &'a T
221 }
222 
223 impl<'a, T: ?Sized + 'a> IndexedInput<'a, T> {
224     #[inline(always)]
source(&self) -> &T225     pub fn source(&self) -> &T {
226         self.source
227     }
228 }
229 
230 impl<'a, T: ToOwned + ?Sized + 'a> IndexedInput<'a, T> {
231     #[inline(always)]
cow_source(&self) -> Cow<'a, T>232     pub fn cow_source(&self) -> Cow<'a, T> {
233         Cow::Borrowed(self.source)
234     }
235 }
236 
237 impl IndexedInput<'_, [u8]> {
backtrack(&mut self, n: usize) -> pear::Result<(), Self>238     pub fn backtrack(&mut self, n: usize) -> pear::Result<(), Self> {
239         let source_addr = self.source.as_ptr() as usize;
240         let current_addr = self.current.as_ptr() as usize;
241         if current_addr > n && (current_addr - n) >= source_addr {
242             let forward = (current_addr - n) - source_addr;
243             self.current = &self.source[forward..];
244             Ok(())
245         } else {
246             let diag = format!("({}, {:x} in {:x})", n, current_addr, source_addr);
247             Err(pear_error!([backtrack; self] "internal error: {}", diag))
248         }
249     }
250 
len(&self) -> usize251     pub fn len(&self) -> usize {
252         self.source.len()
253     }
254 }
255 
256 macro_rules! impl_indexed_input {
257     ($T:ty, token = $token:ty) => (
258         impl<'a> From<&'a $T> for IndexedInput<'a, $T> {
259             #[inline(always)]
260             fn from(source: &'a $T) -> Self {
261                 IndexedInput { source: source, current: source }
262             }
263         }
264 
265         impl<'a> Input for IndexedInput<'a, $T> {
266             type Token = $token;
267             type InSlice = &'a $T;
268             type Slice = Indexed<'static, $T>;
269             type Many = Indexed<'static, $T>;
270             type Context = Context;
271 
272             #[inline(always)]
273             fn peek(&mut self) -> Option<Self::Token> {
274                 self.current.peek()
275             }
276 
277             #[inline(always)]
278             fn peek_slice(&mut self, slice: Self::InSlice) -> Option<Self::Slice> {
279                 self.current.peek_slice(slice)
280                     .map(|slice| unsafe {
281                         Indexed::unchecked_from(slice, self.source)
282                     })
283             }
284 
285             #[inline(always)]
286             fn skip_many<F>(&mut self, cond: F) -> usize
287                 where F: FnMut(Self::Token) -> bool
288             {
289                 self.current.skip_many(cond)
290             }
291 
292             #[inline(always)]
293             fn take_many<F>(&mut self, cond: F) -> Self::Many
294                 where F: FnMut(Self::Token) -> bool
295             {
296                 let many = self.current.take_many(cond);
297                 unsafe { Indexed::unchecked_from(many, self.source) }
298             }
299 
300             #[inline(always)]
301             fn advance(&mut self, count: usize) {
302                 self.current.advance(count)
303             }
304 
305             #[inline(always)]
306             fn is_empty(&mut self) -> bool {
307                 self.current.is_empty()
308             }
309 
310             fn context(&mut self) -> Option<Context> {
311                 let offset = self.source.len() - self.current.len();
312                 let bytes: &[u8] = self.current.as_ref();
313                 let string = String::from_utf8(bytes.into()).ok()?;
314                 Some(Context { offset, string })
315             }
316         }
317     )
318 }
319 
320 #[derive(Debug, PartialEq, Eq, Clone, Hash)]
321 pub struct Context {
322     pub offset: usize,
323     pub string: String
324 }
325 
326 impl std::fmt::Display for Context {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result327     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
328         const LIMIT: usize = 7;
329         write!(f, "[{}:]", self.offset)?;
330 
331         if self.string.len() > LIMIT {
332             write!(f, " {}..", &self.string[..LIMIT])
333         } else if !self.string.is_empty() {
334             write!(f, " {}", &self.string)
335         } else {
336             Ok(())
337         }
338     }
339 }
340 
341 impl_indexed_input!([u8], token = u8);
342 impl_indexed_input!(str, token = char);
343