1 #![allow(dead_code)] 2 3 use std::borrow::Cow; 4 use std::ops::{Index, Range}; 5 use std::fmt::{self, Debug}; 6 7 use pear::{Input, Length}; 8 9 use crate::ext::IntoOwned; 10 11 pub type IndexedString = Indexed<'static, str>; 12 pub type IndexedStr<'a> = Indexed<'a, str>; 13 pub type IndexedBytes<'a> = Indexed<'a, [u8]>; 14 15 pub trait AsPtr { as_ptr(&self) -> *const u816 fn as_ptr(&self) -> *const u8; 17 } 18 19 impl AsPtr for str { 20 #[inline(always)] as_ptr(&self) -> *const u821 fn as_ptr(&self) -> *const u8 { 22 str::as_ptr(self) 23 } 24 } 25 26 impl AsPtr for [u8] { 27 #[inline(always)] as_ptr(&self) -> *const u828 fn as_ptr(&self) -> *const u8 { 29 <[u8]>::as_ptr(self) 30 } 31 } 32 33 #[derive(PartialEq)] 34 pub enum Indexed<'a, T: ?Sized + ToOwned> { 35 Indexed(usize, usize), 36 Concrete(Cow<'a, T>) 37 } 38 39 impl<'a, T: ?Sized + ToOwned + 'a, C: Into<Cow<'a, T>>> From<C> for Indexed<'a, T> { 40 #[inline(always)] from(value: C) -> Indexed<'a, T>41 fn from(value: C) -> Indexed<'a, T> { 42 Indexed::Concrete(value.into()) 43 } 44 } 45 46 impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> { 47 /// Panics if `self` is not an `Indexed`. 48 #[inline(always)] indices(self) -> (usize, usize)49 pub fn indices(self) -> (usize, usize) { 50 match self { 51 Indexed::Indexed(a, b) => (a, b), 52 _ => panic!("cannot convert indexed T to U unless indexed") 53 } 54 } 55 56 /// Panics if `self` is not an `Indexed`. 57 #[inline(always)] coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U>58 pub fn coerce<U: ?Sized + ToOwned>(self) -> Indexed<'a, U> { 59 match self { 60 Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 61 _ => panic!("cannot convert indexed T to U unless indexed") 62 } 63 } 64 65 /// Panics if `self` is not an `Indexed`. 66 #[inline(always)] coerce_lifetime<'b>(self) -> Indexed<'b, T>67 pub fn coerce_lifetime<'b>(self) -> Indexed<'b, T> { 68 match self { 69 Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 70 _ => panic!("cannot coerce lifetime unless indexed") 71 } 72 } 73 } 74 75 impl<T: 'static + ?Sized + ToOwned> IntoOwned for Indexed<'_, T> { 76 type Owned = Indexed<'static, T>; 77 into_owned(self) -> Indexed<'static, T>78 fn into_owned(self) -> Indexed<'static, T> { 79 match self { 80 Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 81 Indexed::Concrete(cow) => Indexed::Concrete(IntoOwned::into_owned(cow)) 82 } 83 } 84 } 85 86 use std::ops::Add; 87 88 impl<'a, T: ?Sized + ToOwned + 'a> Add for Indexed<'a, T> { 89 type Output = Indexed<'a, T>; 90 91 #[inline] add(self, other: Indexed<'a, T>) -> Indexed<'a, T>92 fn add(self, other: Indexed<'a, T>) -> Indexed<'a, T> { 93 match self { 94 Indexed::Indexed(a, b) => match other { 95 Indexed::Indexed(c, d) if b == c && a < d => Indexed::Indexed(a, d), 96 _ => panic!("+ requires indexed") 97 } 98 _ => panic!("+ requires indexed") 99 } 100 } 101 } 102 103 impl<'a, T: ?Sized + ToOwned + 'a> Indexed<'a, T> 104 where T: Length + AsPtr + Index<Range<usize>, Output = T> 105 { 106 // Returns `None` if `needle` is not a substring of `haystack`. checked_from(needle: &T, haystack: &T) -> Option<Indexed<'a, T>>107 pub fn checked_from(needle: &T, haystack: &T) -> Option<Indexed<'a, T>> { 108 let haystack_start = haystack.as_ptr() as usize; 109 let needle_start = needle.as_ptr() as usize; 110 111 if needle_start < haystack_start { 112 return None; 113 } 114 115 if (needle_start + needle.len()) > (haystack_start + haystack.len()) { 116 return None; 117 } 118 119 let start = needle_start - haystack_start; 120 let end = start + needle.len(); 121 Some(Indexed::Indexed(start, end)) 122 } 123 124 // Caller must ensure that `needle` is a substring of `haystack`. unchecked_from(needle: &T, haystack: &T) -> Indexed<'a, T>125 pub unsafe fn unchecked_from(needle: &T, haystack: &T) -> Indexed<'a, T> { 126 let haystack_start = haystack.as_ptr() as usize; 127 let needle_start = needle.as_ptr() as usize; 128 129 let start = needle_start - haystack_start; 130 let end = start + needle.len(); 131 Indexed::Indexed(start, end) 132 } 133 134 /// Whether this string is derived from indexes or not. 135 #[inline] is_indexed(&self) -> bool136 pub fn is_indexed(&self) -> bool { 137 match *self { 138 Indexed::Indexed(..) => true, 139 Indexed::Concrete(..) => false, 140 } 141 } 142 143 /// Whether this string is derived from indexes or not. 144 #[inline] is_empty(&self) -> bool145 pub fn is_empty(&self) -> bool { 146 self.len() == 0 147 } 148 149 /// Retrieves the string `self` corresponds to. If `self` is derived from 150 /// indexes, the corresponding subslice of `source` is returned. Otherwise, 151 /// the concrete string is returned. 152 /// 153 /// # Panics 154 /// 155 /// Panics if `self` is an indexed string and `string` is None. 156 // pub fn to_source(&self, source: Option<&'a T>) -> &T { from_cow_source<'s>(&'s self, source: &'s Option<Cow<'_, T>>) -> &'s T157 pub fn from_cow_source<'s>(&'s self, source: &'s Option<Cow<'_, T>>) -> &'s T { 158 if self.is_indexed() && source.is_none() { 159 panic!("Cannot convert indexed str to str without base string!") 160 } 161 162 match *self { 163 Indexed::Indexed(i, j) => &source.as_ref().unwrap()[i..j], 164 Indexed::Concrete(ref mstr) => mstr.as_ref(), 165 } 166 } 167 168 /// Retrieves the string `self` corresponds to. If `self` is derived from 169 /// indexes, the corresponding subslice of `string` is returned. Otherwise, 170 /// the concrete string is returned. 171 /// 172 /// # Panics 173 /// 174 /// Panics if `self` is an indexed string and `string` is None. from_source<'s>(&'s self, source: Option<&'s T>) -> &'s T175 pub fn from_source<'s>(&'s self, source: Option<&'s T>) -> &'s T { 176 if self.is_indexed() && source.is_none() { 177 panic!("Cannot convert indexed str to str without base string!") 178 } 179 180 match *self { 181 Indexed::Indexed(i, j) => &source.unwrap()[(i as usize)..(j as usize)], 182 Indexed::Concrete(ref mstr) => &*mstr, 183 } 184 } 185 } 186 187 impl<'a, T: ToOwned + ?Sized + 'a> Clone for Indexed<'a, T> { clone(&self) -> Self188 fn clone(&self) -> Self { 189 match *self { 190 Indexed::Indexed(a, b) => Indexed::Indexed(a, b), 191 Indexed::Concrete(ref cow) => Indexed::Concrete(cow.clone()) 192 } 193 } 194 } 195 196 impl<'a, T: ?Sized + 'a> Debug for Indexed<'a, T> 197 where T: ToOwned + Debug, T::Owned: Debug 198 { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result199 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 200 match *self { 201 Indexed::Indexed(a, b) => fmt::Debug::fmt(&(a, b), f), 202 Indexed::Concrete(ref cow) => fmt::Debug::fmt(cow, f), 203 } 204 } 205 } 206 207 impl<'a, T: ?Sized + Length + ToOwned + 'a> Length for Indexed<'a, T> { 208 #[inline(always)] len(&self) -> usize209 fn len(&self) -> usize { 210 match *self { 211 Indexed::Indexed(a, b) => (b - a) as usize, 212 Indexed::Concrete(ref cow) => cow.len() 213 } 214 } 215 } 216 217 #[derive(Debug)] 218 pub struct IndexedInput<'a, T: ?Sized> { 219 source: &'a T, 220 current: &'a T 221 } 222 223 impl<'a, T: ?Sized + 'a> IndexedInput<'a, T> { 224 #[inline(always)] source(&self) -> &T225 pub fn source(&self) -> &T { 226 self.source 227 } 228 } 229 230 impl<'a, T: ToOwned + ?Sized + 'a> IndexedInput<'a, T> { 231 #[inline(always)] cow_source(&self) -> Cow<'a, T>232 pub fn cow_source(&self) -> Cow<'a, T> { 233 Cow::Borrowed(self.source) 234 } 235 } 236 237 impl IndexedInput<'_, [u8]> { backtrack(&mut self, n: usize) -> pear::Result<(), Self>238 pub fn backtrack(&mut self, n: usize) -> pear::Result<(), Self> { 239 let source_addr = self.source.as_ptr() as usize; 240 let current_addr = self.current.as_ptr() as usize; 241 if current_addr > n && (current_addr - n) >= source_addr { 242 let forward = (current_addr - n) - source_addr; 243 self.current = &self.source[forward..]; 244 Ok(()) 245 } else { 246 let diag = format!("({}, {:x} in {:x})", n, current_addr, source_addr); 247 Err(pear_error!([backtrack; self] "internal error: {}", diag)) 248 } 249 } 250 len(&self) -> usize251 pub fn len(&self) -> usize { 252 self.source.len() 253 } 254 } 255 256 macro_rules! impl_indexed_input { 257 ($T:ty, token = $token:ty) => ( 258 impl<'a> From<&'a $T> for IndexedInput<'a, $T> { 259 #[inline(always)] 260 fn from(source: &'a $T) -> Self { 261 IndexedInput { source: source, current: source } 262 } 263 } 264 265 impl<'a> Input for IndexedInput<'a, $T> { 266 type Token = $token; 267 type InSlice = &'a $T; 268 type Slice = Indexed<'static, $T>; 269 type Many = Indexed<'static, $T>; 270 type Context = Context; 271 272 #[inline(always)] 273 fn peek(&mut self) -> Option<Self::Token> { 274 self.current.peek() 275 } 276 277 #[inline(always)] 278 fn peek_slice(&mut self, slice: Self::InSlice) -> Option<Self::Slice> { 279 self.current.peek_slice(slice) 280 .map(|slice| unsafe { 281 Indexed::unchecked_from(slice, self.source) 282 }) 283 } 284 285 #[inline(always)] 286 fn skip_many<F>(&mut self, cond: F) -> usize 287 where F: FnMut(Self::Token) -> bool 288 { 289 self.current.skip_many(cond) 290 } 291 292 #[inline(always)] 293 fn take_many<F>(&mut self, cond: F) -> Self::Many 294 where F: FnMut(Self::Token) -> bool 295 { 296 let many = self.current.take_many(cond); 297 unsafe { Indexed::unchecked_from(many, self.source) } 298 } 299 300 #[inline(always)] 301 fn advance(&mut self, count: usize) { 302 self.current.advance(count) 303 } 304 305 #[inline(always)] 306 fn is_empty(&mut self) -> bool { 307 self.current.is_empty() 308 } 309 310 fn context(&mut self) -> Option<Context> { 311 let offset = self.source.len() - self.current.len(); 312 let bytes: &[u8] = self.current.as_ref(); 313 let string = String::from_utf8(bytes.into()).ok()?; 314 Some(Context { offset, string }) 315 } 316 } 317 ) 318 } 319 320 #[derive(Debug, PartialEq, Eq, Clone, Hash)] 321 pub struct Context { 322 pub offset: usize, 323 pub string: String 324 } 325 326 impl std::fmt::Display for Context { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result327 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 328 const LIMIT: usize = 7; 329 write!(f, "[{}:]", self.offset)?; 330 331 if self.string.len() > LIMIT { 332 write!(f, " {}..", &self.string[..LIMIT]) 333 } else if !self.string.is_empty() { 334 write!(f, " {}", &self.string) 335 } else { 336 Ok(()) 337 } 338 } 339 } 340 341 impl_indexed_input!([u8], token = u8); 342 impl_indexed_input!(str, token = char); 343