1 //! Traits input types have to implement to work with nom combinators
2 //!
3 use crate::internal::{Err, IResult, Needed};
4 use crate::error::{ParseError, ErrorKind};
5 use crate::lib::std::ops::{Range, RangeFrom, RangeFull, RangeTo};
6 use crate::lib::std::iter::Enumerate;
7 use crate::lib::std::slice::Iter;
8 use crate::lib::std::iter::Map;
9 use crate::lib::std::str::Chars;
10 use crate::lib::std::str::CharIndices;
11 use crate::lib::std::str::FromStr;
12 use crate::lib::std::str::from_utf8;
13 use memchr;
14
15 #[cfg(feature = "alloc")]
16 use crate::lib::std::string::String;
17 #[cfg(feature = "alloc")]
18 use crate::lib::std::vec::Vec;
19
20 /// abstract method to calculate the input length
21 pub trait InputLength {
22 /// calculates the input length, as indicated by its name,
23 /// and the name of the trait itself
input_len(&self) -> usize24 fn input_len(&self) -> usize;
25 }
26
27 impl<'a, T> InputLength for &'a [T] {
28 #[inline]
input_len(&self) -> usize29 fn input_len(&self) -> usize {
30 self.len()
31 }
32 }
33
34 impl<'a> InputLength for &'a str {
35 #[inline]
input_len(&self) -> usize36 fn input_len(&self) -> usize {
37 self.len()
38 }
39 }
40
41 impl<'a> InputLength for (&'a [u8], usize) {
42 #[inline]
input_len(&self) -> usize43 fn input_len(&self) -> usize {
44 //println!("bit input length for ({:?}, {}):", self.0, self.1);
45 //println!("-> {}", self.0.len() * 8 - self.1);
46 self.0.len() * 8 - self.1
47 }
48 }
49
50 /// useful functions to calculate the offset between slices and show a hexdump of a slice
51 pub trait Offset {
52 /// offset between the first byte of self and the first byte of the argument
offset(&self, second: &Self) -> usize53 fn offset(&self, second: &Self) -> usize;
54 }
55
56 impl Offset for [u8] {
offset(&self, second: &Self) -> usize57 fn offset(&self, second: &Self) -> usize {
58 let fst = self.as_ptr();
59 let snd = second.as_ptr();
60
61 snd as usize - fst as usize
62 }
63 }
64
65 impl<'a> Offset for &'a [u8] {
offset(&self, second: &Self) -> usize66 fn offset(&self, second: &Self) -> usize {
67 let fst = self.as_ptr();
68 let snd = second.as_ptr();
69
70 snd as usize - fst as usize
71 }
72 }
73
74 impl Offset for str {
offset(&self, second: &Self) -> usize75 fn offset(&self, second: &Self) -> usize {
76 let fst = self.as_ptr();
77 let snd = second.as_ptr();
78
79 snd as usize - fst as usize
80 }
81 }
82
83 impl<'a> Offset for &'a str {
offset(&self, second: &Self) -> usize84 fn offset(&self, second: &Self) -> usize {
85 let fst = self.as_ptr();
86 let snd = second.as_ptr();
87
88 snd as usize - fst as usize
89 }
90 }
91
92 /// Helper trait for types that can be viewed as a byte slice
93 pub trait AsBytes {
94 /// casts the input type to a byte slice
as_bytes(&self) -> &[u8]95 fn as_bytes(&self) -> &[u8];
96 }
97
98 impl<'a> AsBytes for &'a str {
99 #[inline(always)]
as_bytes(&self) -> &[u8]100 fn as_bytes(&self) -> &[u8] {
101 <str as AsBytes>::as_bytes(self)
102 }
103 }
104
105 impl AsBytes for str {
106 #[inline(always)]
as_bytes(&self) -> &[u8]107 fn as_bytes(&self) -> &[u8] {
108 self.as_ref()
109 }
110 }
111
112 impl<'a> AsBytes for &'a [u8] {
113 #[inline(always)]
as_bytes(&self) -> &[u8]114 fn as_bytes(&self) -> &[u8] {
115 *self
116 }
117 }
118
119 impl AsBytes for [u8] {
120 #[inline(always)]
as_bytes(&self) -> &[u8]121 fn as_bytes(&self) -> &[u8] {
122 self
123 }
124 }
125
126 macro_rules! as_bytes_array_impls {
127 ($($N:expr)+) => {
128 $(
129 impl<'a> AsBytes for &'a [u8; $N] {
130 #[inline(always)]
131 fn as_bytes(&self) -> &[u8] {
132 *self
133 }
134 }
135
136 impl AsBytes for [u8; $N] {
137 #[inline(always)]
138 fn as_bytes(&self) -> &[u8] {
139 self
140 }
141 }
142 )+
143 };
144 }
145
146 as_bytes_array_impls! {
147 0 1 2 3 4 5 6 7 8 9
148 10 11 12 13 14 15 16 17 18 19
149 20 21 22 23 24 25 26 27 28 29
150 30 31 32
151 }
152
153 /// transforms common types to a char for basic token parsing
154 pub trait AsChar {
155 /// makes a char from self
as_char(self) -> char156 fn as_char(self) -> char;
157
158 /// tests that self is an alphabetic character
159 ///
160 /// warning: for `&str` it recognizes alphabetic
161 /// characters outside of the 52 ASCII letters
is_alpha(self) -> bool162 fn is_alpha(self) -> bool;
163
164 /// tests that self is an alphabetic character
165 /// or a decimal digit
is_alphanum(self) -> bool166 fn is_alphanum(self) -> bool;
167 /// tests that self is a decimal digit
is_dec_digit(self) -> bool168 fn is_dec_digit(self) -> bool;
169 /// tests that self is an hex digit
is_hex_digit(self) -> bool170 fn is_hex_digit(self) -> bool;
171 /// tests that self is an octal digit
is_oct_digit(self) -> bool172 fn is_oct_digit(self) -> bool;
173 /// gets the len in bytes for self
len(self) -> usize174 fn len(self) -> usize;
175 }
176
177 impl AsChar for u8 {
178 #[inline]
as_char(self) -> char179 fn as_char(self) -> char {
180 self as char
181 }
182 #[inline]
is_alpha(self) -> bool183 fn is_alpha(self) -> bool {
184 (self >= 0x41 && self <= 0x5A) || (self >= 0x61 && self <= 0x7A)
185 }
186 #[inline]
is_alphanum(self) -> bool187 fn is_alphanum(self) -> bool {
188 self.is_alpha() || self.is_dec_digit()
189 }
190 #[inline]
is_dec_digit(self) -> bool191 fn is_dec_digit(self) -> bool {
192 self >= 0x30 && self <= 0x39
193 }
194 #[inline]
is_hex_digit(self) -> bool195 fn is_hex_digit(self) -> bool {
196 (self >= 0x30 && self <= 0x39) || (self >= 0x41 && self <= 0x46) || (self >= 0x61 && self <= 0x66)
197 }
198 #[inline]
is_oct_digit(self) -> bool199 fn is_oct_digit(self) -> bool {
200 self >= 0x30 && self <= 0x37
201 }
202 #[inline]
len(self) -> usize203 fn len(self) -> usize {
204 1
205 }
206 }
207 impl<'a> AsChar for &'a u8 {
208 #[inline]
as_char(self) -> char209 fn as_char(self) -> char {
210 *self as char
211 }
212 #[inline]
is_alpha(self) -> bool213 fn is_alpha(self) -> bool {
214 (*self >= 0x41 && *self <= 0x5A) || (*self >= 0x61 && *self <= 0x7A)
215 }
216 #[inline]
is_alphanum(self) -> bool217 fn is_alphanum(self) -> bool {
218 self.is_alpha() || self.is_dec_digit()
219 }
220 #[inline]
is_dec_digit(self) -> bool221 fn is_dec_digit(self) -> bool {
222 *self >= 0x30 && *self <= 0x39
223 }
224 #[inline]
is_hex_digit(self) -> bool225 fn is_hex_digit(self) -> bool {
226 (*self >= 0x30 && *self <= 0x39) || (*self >= 0x41 && *self <= 0x46) || (*self >= 0x61 && *self <= 0x66)
227 }
228 #[inline]
is_oct_digit(self) -> bool229 fn is_oct_digit(self) -> bool {
230 *self >= 0x30 && *self <= 0x37
231 }
232 #[inline]
len(self) -> usize233 fn len(self) -> usize {
234 1
235 }
236 }
237
238 impl AsChar for char {
239 #[inline]
as_char(self) -> char240 fn as_char(self) -> char {
241 self
242 }
243 #[inline]
is_alpha(self) -> bool244 fn is_alpha(self) -> bool {
245 self.is_ascii_alphabetic()
246 }
247 #[inline]
is_alphanum(self) -> bool248 fn is_alphanum(self) -> bool {
249 self.is_alpha() || self.is_dec_digit()
250 }
251 #[inline]
is_dec_digit(self) -> bool252 fn is_dec_digit(self) -> bool {
253 self.is_ascii_digit()
254 }
255 #[inline]
is_hex_digit(self) -> bool256 fn is_hex_digit(self) -> bool {
257 self.is_ascii_hexdigit()
258 }
259 #[inline]
is_oct_digit(self) -> bool260 fn is_oct_digit(self) -> bool {
261 self.is_digit(8)
262 }
263 #[inline]
len(self) -> usize264 fn len(self) -> usize {
265 self.len_utf8()
266 }
267 }
268
269 impl<'a> AsChar for &'a char {
270 #[inline]
as_char(self) -> char271 fn as_char(self) -> char {
272 *self
273 }
274 #[inline]
is_alpha(self) -> bool275 fn is_alpha(self) -> bool {
276 self.is_ascii_alphabetic()
277 }
278 #[inline]
is_alphanum(self) -> bool279 fn is_alphanum(self) -> bool {
280 self.is_alpha() || self.is_dec_digit()
281 }
282 #[inline]
is_dec_digit(self) -> bool283 fn is_dec_digit(self) -> bool {
284 self.is_ascii_digit()
285 }
286 #[inline]
is_hex_digit(self) -> bool287 fn is_hex_digit(self) -> bool {
288 self.is_ascii_hexdigit()
289 }
290 #[inline]
is_oct_digit(self) -> bool291 fn is_oct_digit(self) -> bool {
292 self.is_digit(8)
293 }
294 #[inline]
len(self) -> usize295 fn len(self) -> usize {
296 self.len_utf8()
297 }
298 }
299
300 /// abstracts common iteration operations on the input type
301 pub trait InputIter {
302 /// the current input type is a sequence of that `Item` type.
303 ///
304 /// example: `u8` for `&[u8]` or `char` for &str`
305 type Item;
306 /// an iterator over the input type, producing the item and its position
307 /// for use with [Slice]. If we're iterating over `&str`, the position
308 /// corresponds to the byte index of the character
309 type Iter: Iterator<Item = (usize, Self::Item)>;
310
311 /// an iterator over the input type, producing the item
312 type IterElem: Iterator<Item = Self::Item>;
313
314 /// returns an iterator over the elements and their byte offsets
iter_indices(&self) -> Self::Iter315 fn iter_indices(&self) -> Self::Iter;
316 /// returns an iterator over the elements
iter_elements(&self) -> Self::IterElem317 fn iter_elements(&self) -> Self::IterElem;
318 /// finds the byte position of the element
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::Item) -> bool319 fn position<P>(&self, predicate: P) -> Option<usize>
320 where
321 P: Fn(Self::Item) -> bool;
322 /// get the byte offset from the element's position in the stream
slice_index(&self, count: usize) -> Option<usize>323 fn slice_index(&self, count: usize) -> Option<usize>;
324 }
325
326 /// abstracts slicing operations
327 pub trait InputTake: Sized {
328 /// returns a slice of `count` bytes. panics if count > length
take(&self, count: usize) -> Self329 fn take(&self, count: usize) -> Self;
330 /// split the stream at the `count` byte offset. panics if count > length
take_split(&self, count: usize) -> (Self, Self)331 fn take_split(&self, count: usize) -> (Self, Self);
332 }
333
star(r_u8: &u8) -> u8334 fn star(r_u8: &u8) -> u8 {
335 *r_u8
336 }
337
338 impl<'a> InputIter for &'a [u8] {
339 type Item = u8;
340 type Iter = Enumerate<Self::IterElem>;
341 type IterElem = Map<Iter<'a, Self::Item>, fn(&u8) -> u8>;
342
343 #[inline]
iter_indices(&self) -> Self::Iter344 fn iter_indices(&self) -> Self::Iter {
345 self.iter_elements().enumerate()
346 }
347 #[inline]
iter_elements(&self) -> Self::IterElem348 fn iter_elements(&self) -> Self::IterElem {
349 self.iter().map(star)
350 }
351 #[inline]
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::Item) -> bool,352 fn position<P>(&self, predicate: P) -> Option<usize>
353 where
354 P: Fn(Self::Item) -> bool,
355 {
356 self.iter().position(|b| predicate(*b))
357 }
358 #[inline]
slice_index(&self, count: usize) -> Option<usize>359 fn slice_index(&self, count: usize) -> Option<usize> {
360 if self.len() >= count {
361 Some(count)
362 } else {
363 None
364 }
365 }
366 }
367
368 impl<'a> InputTake for &'a [u8] {
369 #[inline]
take(&self, count: usize) -> Self370 fn take(&self, count: usize) -> Self {
371 &self[0..count]
372 }
373 #[inline]
take_split(&self, count: usize) -> (Self, Self)374 fn take_split(&self, count: usize) -> (Self, Self) {
375 let (prefix, suffix) = self.split_at(count);
376 (suffix, prefix)
377 }
378 }
379
380 impl<'a> InputIter for &'a str {
381 type Item = char;
382 type Iter = CharIndices<'a>;
383 type IterElem = Chars<'a>;
384 #[inline]
iter_indices(&self) -> Self::Iter385 fn iter_indices(&self) -> Self::Iter {
386 self.char_indices()
387 }
388 #[inline]
iter_elements(&self) -> Self::IterElem389 fn iter_elements(&self) -> Self::IterElem {
390 self.chars()
391 }
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::Item) -> bool,392 fn position<P>(&self, predicate: P) -> Option<usize>
393 where
394 P: Fn(Self::Item) -> bool,
395 {
396 for (o, c) in self.char_indices() {
397 if predicate(c) {
398 return Some(o);
399 }
400 }
401 None
402 }
403 #[inline]
slice_index(&self, count: usize) -> Option<usize>404 fn slice_index(&self, count: usize) -> Option<usize> {
405 let mut cnt = 0;
406 for (index, _) in self.char_indices() {
407 if cnt == count {
408 return Some(index);
409 }
410 cnt += 1;
411 }
412 if cnt == count {
413 return Some(self.len());
414 }
415 None
416 }
417 }
418
419 impl<'a> InputTake for &'a str {
420 #[inline]
take(&self, count: usize) -> Self421 fn take(&self, count: usize) -> Self {
422 &self[..count]
423 }
424
425 // return byte index
426 #[inline]
take_split(&self, count: usize) -> (Self, Self)427 fn take_split(&self, count: usize) -> (Self, Self) {
428 (&self[count..], &self[..count])
429 }
430 }
431
432 /// Dummy trait used for default implementations (currently only used for `InputTakeAtPosition`).
433 ///
434 /// When implementing a custom input type, it is possible to use directly the
435 /// default implementation: if the input type implements `InputLength`, `InputIter`,
436 /// `InputTake` and `Clone`, you can implement `UnspecializedInput` and get
437 /// a default version of `InputTakeAtPosition`.
438 ///
439 /// For performance reasons, you might want to write a custom implementation of
440 /// `InputTakeAtPosition` (like the one for `&[u8]`).
441 pub trait UnspecializedInput {}
442
443 /// methods to take as much input as possible until the provided function returns true for the current element
444 ///
445 /// a large part of nom's basic parsers are built using this trait
446 pub trait InputTakeAtPosition: Sized {
447 /// the current input type is a sequence of that `Item` type.
448 ///
449 /// example: `u8` for `&[u8]` or `char` for &str`
450 type Item;
451
452 /// looks for the first element of the input type for which the condition returns true,
453 /// and returns the input up to this position
454 ///
455 /// *streaming version*: if no element is found matching the condition, this will return `Incomplete`
split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool456 fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
457 where
458 P: Fn(Self::Item) -> bool;
459
460 /// looks for the first element of the input type for which the condition returns true
461 /// and returns the input up to this position
462 ///
463 /// fails if the produced slice is empty
464 ///
465 /// *streaming version*: if no element is found matching the condition, this will return `Incomplete`
split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool466 fn split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
467 where
468 P: Fn(Self::Item) -> bool;
469
470 /// looks for the first element of the input type for which the condition returns true,
471 /// and returns the input up to this position
472 ///
473 /// *complete version*: if no element is found matching the condition, this will return the whole input
split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool474 fn split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
475 where
476 P: Fn(Self::Item) -> bool;
477
478 /// looks for the first element of the input type for which the condition returns true
479 /// and returns the input up to this position
480 ///
481 /// fails if the produced slice is empty
482 ///
483 /// *complete version*: if no element is found matching the condition, this will return the whole input
split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool484 fn split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
485 where
486 P: Fn(Self::Item) -> bool;
487 }
488
489 impl<T: InputLength + InputIter + InputTake + Clone + UnspecializedInput> InputTakeAtPosition for T {
490 type Item = <T as InputIter>::Item;
491
split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,492 fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
493 where
494 P: Fn(Self::Item) -> bool,
495 {
496 match self.position(predicate) {
497 Some(n) => Ok(self.take_split(n)),
498 None => Err(Err::Incomplete(Needed::Size(1))),
499 }
500 }
501
split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,502 fn split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
503 where
504 P: Fn(Self::Item) -> bool,
505 {
506 match self.position(predicate) {
507 Some(0) => Err(Err::Error(E::from_error_kind(self.clone(), e))),
508 Some(n) => Ok(self.take_split(n)),
509 None => Err(Err::Incomplete(Needed::Size(1))),
510 }
511 }
512
split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool513 fn split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
514 where P: Fn(Self::Item) -> bool {
515 match self.split_at_position(predicate) {
516 Err(Err::Incomplete(_)) => Ok(self.take_split(self.input_len())),
517 res => res,
518 }
519 }
520
split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool521 fn split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
522 where P: Fn(Self::Item) -> bool {
523 match self.split_at_position1(predicate, e) {
524 Err(Err::Incomplete(_)) => if self.input_len() == 0 {
525 Err(Err::Error(E::from_error_kind(self.clone(), e)))
526 } else {
527 Ok(self.take_split(self.input_len()))
528 }
529 res => res,
530 }
531 }
532 }
533
534 impl<'a> InputTakeAtPosition for &'a [u8] {
535 type Item = u8;
536
split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,537 fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
538 where
539 P: Fn(Self::Item) -> bool,
540 {
541 match (0..self.len()).find(|b| predicate(self[*b])) {
542 Some(i) => Ok((&self[i..], &self[..i])),
543 None => Err(Err::Incomplete(Needed::Size(1))),
544 }
545 }
546
split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,547 fn split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
548 where
549 P: Fn(Self::Item) -> bool,
550 {
551 match (0..self.len()).find(|b| predicate(self[*b])) {
552 Some(0) => Err(Err::Error(E::from_error_kind(self, e))),
553 Some(i) => Ok((&self[i..], &self[..i])),
554 None => Err(Err::Incomplete(Needed::Size(1))),
555 }
556 }
557
split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool558 fn split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
559 where P: Fn(Self::Item) -> bool {
560 match (0..self.len()).find(|b| predicate(self[*b])) {
561 Some(i) => Ok((&self[i..], &self[..i])),
562 None => Ok(self.take_split(self.input_len())),
563 }
564 }
565
split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool566 fn split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
567 where P: Fn(Self::Item) -> bool {
568 match (0..self.len()).find(|b| predicate(self[*b])) {
569 Some(0) => Err(Err::Error(E::from_error_kind(self, e))),
570 Some(i) => Ok((&self[i..], &self[..i])),
571 None => {
572 if self.len() == 0 {
573 Err(Err::Error(E::from_error_kind(self, e)))
574 } else {
575 Ok(self.take_split(self.input_len()))
576 }
577 },
578 }
579 }
580 }
581
582 impl<'a> InputTakeAtPosition for &'a str {
583 type Item = char;
584
split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,585 fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
586 where
587 P: Fn(Self::Item) -> bool,
588 {
589 match self.find(predicate) {
590 Some(i) => Ok((&self[i..], &self[..i])),
591 None => Err(Err::Incomplete(Needed::Size(1))),
592 }
593 }
594
split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool,595 fn split_at_position1<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
596 where
597 P: Fn(Self::Item) -> bool,
598 {
599 match self.find(predicate) {
600 Some(0) => Err(Err::Error(E::from_error_kind(self, e))),
601 Some(i) => Ok((&self[i..], &self[..i])),
602 None => Err(Err::Incomplete(Needed::Size(1))),
603 }
604 }
605
split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool606 fn split_at_position_complete<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
607 where P: Fn(Self::Item) -> bool {
608 match self.find(predicate) {
609 Some(i) => Ok((&self[i..], &self[..i])),
610 None => Ok(self.take_split(self.input_len()))
611 }
612 }
613
split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E> where P: Fn(Self::Item) -> bool614 fn split_at_position1_complete<P, E: ParseError<Self>>(&self, predicate: P, e: ErrorKind) -> IResult<Self, Self, E>
615 where P: Fn(Self::Item) -> bool {
616 match self.find(predicate) {
617 Some(0) => Err(Err::Error(E::from_error_kind(self, e))),
618 Some(i) => Ok((&self[i..], &self[..i])),
619 None => {
620 if self.len() == 0 {
621 Err(Err::Error(E::from_error_kind(self, e)))
622 } else {
623 Ok(self.take_split(self.input_len()))
624 }
625 },
626 }
627 }
628 }
629
630 /// indicates wether a comparison was successful, an error, or
631 /// if more data was needed
632 #[derive(Debug, PartialEq)]
633 pub enum CompareResult {
634 /// comparison was successful
635 Ok,
636 /// we need more data to be sure
637 Incomplete,
638 /// comparison failed
639 Error,
640 }
641
642 /// abstracts comparison operations
643 pub trait Compare<T> {
644 /// compares self to another value for equality
compare(&self, t: T) -> CompareResult645 fn compare(&self, t: T) -> CompareResult;
646 /// compares self to another value for equality
647 /// independently of the case.
648 ///
649 /// warning: for `&str`, the comparison is done
650 /// by lowercasing both strings and comparing
651 /// the result. This is a temporary solution until
652 /// a better one appears
compare_no_case(&self, t: T) -> CompareResult653 fn compare_no_case(&self, t: T) -> CompareResult;
654 }
655
656 impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] {
657 #[inline(always)]
compare(&self, t: &'b [u8]) -> CompareResult658 fn compare(&self, t: &'b [u8]) -> CompareResult {
659 let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b);
660
661 match pos {
662 Some(_) => CompareResult::Error,
663 None => {
664 if self.len() >= t.len() {
665 CompareResult::Ok
666 } else {
667 CompareResult::Incomplete
668 }
669 }
670 }
671
672 /*
673 let len = self.len();
674 let blen = t.len();
675 let m = if len < blen { len } else { blen };
676 let reduced = &self[..m];
677 let b = &t[..m];
678
679 if reduced != b {
680 CompareResult::Error
681 } else if m < blen {
682 CompareResult::Incomplete
683 } else {
684 CompareResult::Ok
685 }
686 */
687 }
688
689 #[inline(always)]
compare_no_case(&self, t: &'b [u8]) -> CompareResult690 fn compare_no_case(&self, t: &'b [u8]) -> CompareResult {
691 let len = self.len();
692 let blen = t.len();
693 let m = if len < blen { len } else { blen };
694 let reduced = &self[..m];
695 let other = &t[..m];
696
697 if !reduced.iter().zip(other).all(|(a, b)| match (*a, *b) {
698 (0..=64, 0..=64) | (91..=96, 91..=96) | (123..=255, 123..=255) => a == b,
699 (65..=90, 65..=90) | (97..=122, 97..=122) | (65..=90, 97..=122) | (97..=122, 65..=90) => *a | 0b00_10_00_00 == *b | 0b00_10_00_00,
700 _ => false,
701 }) {
702 CompareResult::Error
703 } else if m < blen {
704 CompareResult::Incomplete
705 } else {
706 CompareResult::Ok
707 }
708 }
709 }
710
711 impl<'a, 'b> Compare<&'b str> for &'a [u8] {
712 #[inline(always)]
compare(&self, t: &'b str) -> CompareResult713 fn compare(&self, t: &'b str) -> CompareResult {
714 self.compare(AsBytes::as_bytes(t))
715 }
716 #[inline(always)]
compare_no_case(&self, t: &'b str) -> CompareResult717 fn compare_no_case(&self, t: &'b str) -> CompareResult {
718 self.compare_no_case(AsBytes::as_bytes(t))
719 }
720 }
721
722 impl<'a, 'b> Compare<&'b str> for &'a str {
723 #[inline(always)]
compare(&self, t: &'b str) -> CompareResult724 fn compare(&self, t: &'b str) -> CompareResult {
725 let pos = self.chars().zip(t.chars()).position(|(a, b)| a != b);
726
727 match pos {
728 Some(_) => CompareResult::Error,
729 None => {
730 if self.len() >= t.len() {
731 CompareResult::Ok
732 } else {
733 CompareResult::Incomplete
734 }
735 }
736 }
737 }
738
739 //FIXME: this version is too simple and does not use the current locale
740 #[inline(always)]
compare_no_case(&self, t: &'b str) -> CompareResult741 fn compare_no_case(&self, t: &'b str) -> CompareResult {
742 let pos = self
743 .chars()
744 .zip(t.chars())
745 .position(|(a, b)| a.to_lowercase().zip(b.to_lowercase()).any(|(a, b)| a != b));
746
747 match pos {
748 Some(_) => CompareResult::Error,
749 None => {
750 if self.len() >= t.len() {
751 CompareResult::Ok
752 } else {
753 CompareResult::Incomplete
754 }
755 }
756 }
757 }
758 }
759
760 /// look for a token in self
761 pub trait FindToken<T> {
762 /// returns true if self contains the token
find_token(&self, token: T) -> bool763 fn find_token(&self, token: T) -> bool;
764 }
765
766 impl<'a> FindToken<u8> for &'a [u8] {
find_token(&self, token: u8) -> bool767 fn find_token(&self, token: u8) -> bool {
768 memchr::memchr(token, self).is_some()
769 }
770 }
771
772 impl<'a> FindToken<u8> for &'a str {
find_token(&self, token: u8) -> bool773 fn find_token(&self, token: u8) -> bool {
774 self.as_bytes().find_token(token)
775 }
776 }
777
778 impl<'a, 'b> FindToken<&'a u8> for &'b [u8] {
find_token(&self, token: &u8) -> bool779 fn find_token(&self, token: &u8) -> bool {
780 memchr::memchr(*token, self).is_some()
781 }
782 }
783
784 impl<'a, 'b> FindToken<&'a u8> for &'b str {
find_token(&self, token: &u8) -> bool785 fn find_token(&self, token: &u8) -> bool {
786 self.as_bytes().find_token(token)
787 }
788 }
789
790 impl<'a> FindToken<char> for &'a [u8] {
find_token(&self, token: char) -> bool791 fn find_token(&self, token: char) -> bool {
792 for i in self.iter() {
793 if token as u8 == *i {
794 return true;
795 }
796 }
797 false
798 }
799 }
800
801 impl<'a> FindToken<char> for &'a str {
find_token(&self, token: char) -> bool802 fn find_token(&self, token: char) -> bool {
803 for i in self.chars() {
804 if token == i {
805 return true;
806 }
807 }
808 false
809 }
810 }
811
812 /// look for a substring in self
813 pub trait FindSubstring<T> {
814 /// returns the byte position of the substring if it is found
find_substring(&self, substr: T) -> Option<usize>815 fn find_substring(&self, substr: T) -> Option<usize>;
816 }
817
818 impl<'a, 'b> FindSubstring<&'b [u8]> for &'a [u8] {
find_substring(&self, substr: &'b [u8]) -> Option<usize>819 fn find_substring(&self, substr: &'b [u8]) -> Option<usize> {
820 let substr_len = substr.len();
821
822 if substr_len == 0 {
823 // an empty substring is found at position 0
824 // This matches the behavior of str.find("").
825 Some(0)
826 } else if substr_len == 1 {
827 memchr::memchr(substr[0], self)
828 } else if substr_len > self.len() {
829 None
830 } else {
831 let max = self.len() - substr_len;
832 let mut offset = 0;
833 let mut haystack = &self[..];
834
835 while let Some(position) = memchr::memchr(substr[0], haystack) {
836 offset += position;
837
838 if offset > max {
839 return None;
840 }
841
842 if &haystack[position..position + substr_len] == substr {
843 return Some(offset);
844 }
845
846 haystack = &haystack[position + 1..];
847 offset += 1;
848 }
849
850 None
851 }
852 }
853 }
854
855 impl<'a, 'b> FindSubstring<&'b str> for &'a [u8] {
find_substring(&self, substr: &'b str) -> Option<usize>856 fn find_substring(&self, substr: &'b str) -> Option<usize> {
857 self.find_substring(AsBytes::as_bytes(substr))
858 }
859 }
860
861 impl<'a, 'b> FindSubstring<&'b str> for &'a str {
862 //returns byte index
find_substring(&self, substr: &'b str) -> Option<usize>863 fn find_substring(&self, substr: &'b str) -> Option<usize> {
864 self.find(substr)
865 }
866 }
867
868 /// used to integrate str's parse() method
869 pub trait ParseTo<R> {
870 /// succeeds if `parse()` succeeded. The byte slice implementation
871 /// will first convert it to a &str, then apply the `parse()` function
parse_to(&self) -> Option<R>872 fn parse_to(&self) -> Option<R>;
873 }
874
875 impl<'a, R: FromStr> ParseTo<R> for &'a [u8] {
parse_to(&self) -> Option<R>876 fn parse_to(&self) -> Option<R> {
877 from_utf8(self).ok().and_then(|s| s.parse().ok())
878 }
879 }
880
881 impl<'a, R: FromStr> ParseTo<R> for &'a str {
parse_to(&self) -> Option<R>882 fn parse_to(&self) -> Option<R> {
883 self.parse().ok()
884 }
885 }
886
887 /// slicing operations using ranges
888 ///
889 /// this trait is loosely based on
890 /// `Index`, but can actually return
891 /// something else than a `&[T]` or `&str`
892 pub trait Slice<R> {
893 /// slices self according to the range argument
slice(&self, range: R) -> Self894 fn slice(&self, range: R) -> Self;
895 }
896
897 macro_rules! impl_fn_slice {
898 ( $ty:ty ) => {
899 fn slice(&self, range:$ty) -> Self {
900 &self[range]
901 }
902 }
903 }
904
905 macro_rules! slice_range_impl {
906 ( [ $for_type:ident ], $ty:ty ) => {
907 impl<'a, $for_type> Slice<$ty> for &'a [$for_type] {
908 impl_fn_slice!( $ty );
909 }
910 };
911 ( $for_type:ty, $ty:ty ) => {
912 impl<'a> Slice<$ty> for &'a $for_type {
913 impl_fn_slice!( $ty );
914 }
915 }
916 }
917
918 macro_rules! slice_ranges_impl {
919 ( [ $for_type:ident ] ) => {
920 slice_range_impl! {[$for_type], Range<usize>}
921 slice_range_impl! {[$for_type], RangeTo<usize>}
922 slice_range_impl! {[$for_type], RangeFrom<usize>}
923 slice_range_impl! {[$for_type], RangeFull}
924 };
925 ( $for_type:ty ) => {
926 slice_range_impl! {$for_type, Range<usize>}
927 slice_range_impl! {$for_type, RangeTo<usize>}
928 slice_range_impl! {$for_type, RangeFrom<usize>}
929 slice_range_impl! {$for_type, RangeFull}
930 }
931 }
932
933 slice_ranges_impl! {str}
934 slice_ranges_impl! {[T]}
935
936 macro_rules! array_impls {
937 ($($N:expr)+) => {
938 $(
939 impl InputLength for [u8; $N] {
940 #[inline]
941 fn input_len(&self) -> usize {
942 self.len()
943 }
944 }
945
946 impl<'a> InputLength for &'a [u8; $N] {
947 #[inline]
948 fn input_len(&self) -> usize {
949 self.len()
950 }
951 }
952
953 impl<'a> Compare<[u8; $N]> for &'a [u8] {
954 #[inline(always)]
955 fn compare(&self, t: [u8; $N]) -> CompareResult {
956 self.compare(&t[..])
957 }
958
959 #[inline(always)]
960 fn compare_no_case(&self, t: [u8;$N]) -> CompareResult {
961 self.compare_no_case(&t[..])
962 }
963 }
964
965 impl<'a,'b> Compare<&'b [u8; $N]> for &'a [u8] {
966 #[inline(always)]
967 fn compare(&self, t: &'b [u8; $N]) -> CompareResult {
968 self.compare(&t[..])
969 }
970
971 #[inline(always)]
972 fn compare_no_case(&self, t: &'b [u8;$N]) -> CompareResult {
973 self.compare_no_case(&t[..])
974 }
975 }
976
977 impl FindToken<u8> for [u8; $N] {
978 fn find_token(&self, token: u8) -> bool {
979 memchr::memchr(token, &self[..]).is_some()
980 }
981 }
982
983 impl<'a> FindToken<&'a u8> for [u8; $N] {
984 fn find_token(&self, token: &u8) -> bool {
985 memchr::memchr(*token, &self[..]).is_some()
986 }
987 }
988 )+
989 };
990 }
991
992 array_impls! {
993 0 1 2 3 4 5 6 7 8 9
994 10 11 12 13 14 15 16 17 18 19
995 20 21 22 23 24 25 26 27 28 29
996 30 31 32
997 }
998
999 /// abstracts something which can extend an `Extend`
1000 /// used to build modified input slices in `escaped_transform`
1001 pub trait ExtendInto {
1002
1003 /// the current input type is a sequence of that `Item` type.
1004 ///
1005 /// example: `u8` for `&[u8]` or `char` for &str`
1006 type Item;
1007
1008 /// the type that will be produced
1009 type Extender: Extend<Self::Item>;
1010
1011 /// create a new `Extend` of the correct type
new_builder(&self) -> Self::Extender1012 fn new_builder(&self) -> Self::Extender;
1013 /// accumulate the input into an accumulator
extend_into(&self, acc: &mut Self::Extender)1014 fn extend_into(&self, acc: &mut Self::Extender);
1015 }
1016
1017 #[cfg(feature = "alloc")]
1018 impl ExtendInto for [u8] {
1019 type Item = u8;
1020 type Extender = Vec<u8>;
1021
1022 #[inline]
new_builder(&self) -> Vec<u8>1023 fn new_builder(&self) -> Vec<u8> {
1024 Vec::new()
1025 }
1026 #[inline]
extend_into(&self, acc: &mut Vec<u8>)1027 fn extend_into(&self, acc: &mut Vec<u8>) {
1028 acc.extend(self.iter().cloned());
1029 }
1030 }
1031
1032 #[cfg(feature = "alloc")]
1033 impl ExtendInto for &[u8] {
1034 type Item = u8;
1035 type Extender = Vec<u8>;
1036
1037 #[inline]
new_builder(&self) -> Vec<u8>1038 fn new_builder(&self) -> Vec<u8> {
1039 Vec::new()
1040 }
1041 #[inline]
extend_into(&self, acc: &mut Vec<u8>)1042 fn extend_into(&self, acc: &mut Vec<u8>) {
1043 acc.extend(self.iter().cloned());
1044 }
1045 }
1046
1047
1048 #[cfg(feature = "alloc")]
1049 impl ExtendInto for str {
1050 type Item = char;
1051 type Extender = String;
1052
1053 #[inline]
new_builder(&self) -> String1054 fn new_builder(&self) -> String {
1055 String::new()
1056 }
1057 #[inline]
extend_into(&self, acc: &mut String)1058 fn extend_into(&self, acc: &mut String) {
1059 acc.push_str(self);
1060 }
1061 }
1062
1063 #[cfg(feature = "alloc")]
1064 impl ExtendInto for &str {
1065 type Item = char;
1066 type Extender = String;
1067
1068 #[inline]
new_builder(&self) -> String1069 fn new_builder(&self) -> String {
1070 String::new()
1071 }
1072 #[inline]
extend_into(&self, acc: &mut String)1073 fn extend_into(&self, acc: &mut String) {
1074 acc.push_str(self);
1075 }
1076 }
1077
1078 #[cfg(feature = "alloc")]
1079 impl ExtendInto for char {
1080 type Item = char;
1081 type Extender = String;
1082
1083 #[inline]
new_builder(&self) -> String1084 fn new_builder(&self) -> String {
1085 String::new()
1086 }
1087 #[inline]
extend_into(&self, acc: &mut String)1088 fn extend_into(&self, acc: &mut String) {
1089 acc.push(*self);
1090 }
1091 }
1092
1093 /// Helper trait to convert numbers to usize
1094 ///
1095 /// by default, usize implements `From<u8>` and `From<u16>` but not
1096 /// `From<u32>` and `From<u64>` because that would be invalid on some
1097 /// platforms. This trait implements the conversion for platforms
1098 /// with 32 and 64 bits pointer platforms
1099 pub trait ToUsize {
1100 /// converts self to usize
to_usize(&self) -> usize1101 fn to_usize(&self) -> usize;
1102 }
1103
1104 impl ToUsize for u8 {
1105 #[inline]
to_usize(&self) -> usize1106 fn to_usize(&self) -> usize {
1107 *self as usize
1108 }
1109 }
1110
1111 impl ToUsize for u16 {
1112 #[inline]
to_usize(&self) -> usize1113 fn to_usize(&self) -> usize {
1114 *self as usize
1115 }
1116 }
1117
1118 impl ToUsize for usize {
1119 #[inline]
to_usize(&self) -> usize1120 fn to_usize(&self) -> usize {
1121 *self
1122 }
1123 }
1124
1125 #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
1126 impl ToUsize for u32 {
1127 #[inline]
to_usize(&self) -> usize1128 fn to_usize(&self) -> usize {
1129 *self as usize
1130 }
1131 }
1132
1133 #[cfg(target_pointer_width = "64")]
1134 impl ToUsize for u64 {
1135 #[inline]
to_usize(&self) -> usize1136 fn to_usize(&self) -> usize {
1137 *self as usize
1138 }
1139 }
1140
1141 /// equivalent From implementation to avoid orphan rules in bits parsers
1142 pub trait ErrorConvert<E> {
1143 /// transform to another error type
convert(self) -> E1144 fn convert(self) -> E;
1145 }
1146
1147 impl<I> ErrorConvert<(I, ErrorKind)> for ((I, usize), ErrorKind) {
convert(self) -> (I, ErrorKind)1148 fn convert(self) -> (I, ErrorKind) {
1149 ((self.0).0, self.1)
1150 }
1151 }
1152
1153 impl<I> ErrorConvert<((I, usize), ErrorKind)> for (I, ErrorKind) {
convert(self) -> ((I, usize), ErrorKind)1154 fn convert(self) -> ((I, usize), ErrorKind) {
1155 ((self.0, 0), self.1)
1156 }
1157 }
1158
1159 #[cfg(test)]
1160 mod tests {
1161 use super::*;
1162
1163 #[test]
test_offset_u8()1164 fn test_offset_u8() {
1165 let s = b"abcd123";
1166 let a = &s[..];
1167 let b = &a[2..];
1168 let c = &a[..4];
1169 let d = &a[3..5];
1170 assert_eq!(a.offset(b), 2);
1171 assert_eq!(a.offset(c), 0);
1172 assert_eq!(a.offset(d), 3);
1173 }
1174
1175 #[test]
test_offset_str()1176 fn test_offset_str() {
1177 let s = "abcřèÂßÇd123";
1178 let a = &s[..];
1179 let b = &a[7..];
1180 let c = &a[..5];
1181 let d = &a[5..9];
1182 assert_eq!(a.offset(b), 7);
1183 assert_eq!(a.offset(c), 0);
1184 assert_eq!(a.offset(d), 5);
1185 }
1186 }
1187