1 //! Traits input types have to implement to work with nom combinators
2 //!
3 use internal::{Err, IResult, Needed};
4 use lib::std::ops::{Range, RangeFrom, RangeFull, RangeTo};
5 use lib::std::iter::Enumerate;
6 use lib::std::slice::Iter;
7 use lib::std::iter::Map;
8
9 use lib::std::str::Chars;
10 use lib::std::str::CharIndices;
11 use lib::std::str::FromStr;
12 use lib::std::str::from_utf8;
13 #[cfg(feature = "alloc")]
14 use lib::std::string::String;
15 #[cfg(feature = "alloc")]
16 use lib::std::vec::Vec;
17
18 use memchr;
19
20 #[cfg(feature = "verbose-errors")]
21 use verbose_errors::Context;
22 #[cfg(not(feature = "verbose-errors"))]
23 use simple_errors::Context;
24
25 use util::ErrorKind;
26
27 /// abstract method to calculate the input length
28 pub trait InputLength {
29 /// calculates the input length, as indicated by its name,
30 /// and the name of the trait itself
31 #[inline]
input_len(&self) -> usize32 fn input_len(&self) -> usize;
33 }
34
35 impl<'a, T> InputLength for &'a [T] {
36 #[inline]
input_len(&self) -> usize37 fn input_len(&self) -> usize {
38 self.len()
39 }
40 }
41
42 impl<'a> InputLength for &'a str {
43 #[inline]
input_len(&self) -> usize44 fn input_len(&self) -> usize {
45 self.len()
46 }
47 }
48
49 impl<'a> InputLength for (&'a [u8], usize) {
50 #[inline]
input_len(&self) -> usize51 fn input_len(&self) -> usize {
52 //println!("bit input length for ({:?}, {}):", self.0, self.1);
53 //println!("-> {}", self.0.len() * 8 - self.1);
54 self.0.len() * 8 - self.1
55 }
56 }
57
58 /// useful functions to calculate the offset between slices and show a hexdump of a slice
59 pub trait Offset {
60 /// offset between the first byte of self and the first byte of the argument
offset(&self, second: &Self) -> usize61 fn offset(&self, second: &Self) -> usize;
62 }
63
64 impl Offset for [u8] {
offset(&self, second: &Self) -> usize65 fn offset(&self, second: &Self) -> usize {
66 let fst = self.as_ptr();
67 let snd = second.as_ptr();
68
69 snd as usize - fst as usize
70 }
71 }
72
73 impl<'a> Offset for &'a [u8] {
offset(&self, second: &Self) -> usize74 fn offset(&self, second: &Self) -> usize {
75 let fst = self.as_ptr();
76 let snd = second.as_ptr();
77
78 snd as usize - fst as usize
79 }
80 }
81
82 impl Offset for str {
offset(&self, second: &Self) -> usize83 fn offset(&self, second: &Self) -> usize {
84 let fst = self.as_ptr();
85 let snd = second.as_ptr();
86
87 snd as usize - fst as usize
88 }
89 }
90
91 impl<'a> Offset for &'a str {
offset(&self, second: &Self) -> usize92 fn offset(&self, second: &Self) -> usize {
93 let fst = self.as_ptr();
94 let snd = second.as_ptr();
95
96 snd as usize - fst as usize
97 }
98 }
99
100 /// casts the input type to a byte slice
101 pub trait AsBytes {
as_bytes(&self) -> &[u8]102 fn as_bytes(&self) -> &[u8];
103 }
104
105 impl<'a> AsBytes for &'a str {
106 #[inline(always)]
as_bytes(&self) -> &[u8]107 fn as_bytes(&self) -> &[u8] {
108 <str as AsBytes>::as_bytes(self)
109 }
110 }
111
112 impl AsBytes for str {
113 #[inline(always)]
as_bytes(&self) -> &[u8]114 fn as_bytes(&self) -> &[u8] {
115 self.as_ref()
116 }
117 }
118
119 impl<'a> AsBytes for &'a [u8] {
120 #[inline(always)]
as_bytes(&self) -> &[u8]121 fn as_bytes(&self) -> &[u8] {
122 *self
123 }
124 }
125
126 impl AsBytes for [u8] {
127 #[inline(always)]
as_bytes(&self) -> &[u8]128 fn as_bytes(&self) -> &[u8] {
129 self
130 }
131 }
132
133 macro_rules! as_bytes_array_impls {
134 ($($N:expr)+) => {
135 $(
136 impl<'a> AsBytes for &'a [u8; $N] {
137 #[inline(always)]
138 fn as_bytes(&self) -> &[u8] {
139 *self
140 }
141 }
142
143 impl AsBytes for [u8; $N] {
144 #[inline(always)]
145 fn as_bytes(&self) -> &[u8] {
146 self
147 }
148 }
149 )+
150 };
151 }
152
153 as_bytes_array_impls! {
154 0 1 2 3 4 5 6 7 8 9
155 10 11 12 13 14 15 16 17 18 19
156 20 21 22 23 24 25 26 27 28 29
157 30 31 32
158 }
159
160 /// transforms common types to a char for basic token parsing
161 pub trait AsChar {
162 /// makes a char from self
163 #[inline]
as_char(self) -> char164 fn as_char(self) -> char;
165
166 /// tests that self is an alphabetic character
167 ///
168 /// warning: for `&str` it recognizes alphabetic
169 /// characters outside of the 52 ASCII letters
170 #[inline]
is_alpha(self) -> bool171 fn is_alpha(self) -> bool;
172
173 /// tests that self is an alphabetic character
174 /// or a decimal digit
175 #[inline]
is_alphanum(self) -> bool176 fn is_alphanum(self) -> bool;
177 /// tests that self is a decimal digit
178 #[inline]
is_dec_digit(self) -> bool179 fn is_dec_digit(self) -> bool;
180 /// tests that self is an hex digit
181 #[inline]
is_hex_digit(self) -> bool182 fn is_hex_digit(self) -> bool;
183 /// tests that self is an octal digit
184 #[inline]
is_oct_digit(self) -> bool185 fn is_oct_digit(self) -> bool;
186 /// gets the len in bytes for self
187 #[inline]
len(self) -> usize188 fn len(self) -> usize;
189 }
190
191 impl AsChar for u8 {
192 #[inline]
as_char(self) -> char193 fn as_char(self) -> char {
194 self as char
195 }
196 #[inline]
is_alpha(self) -> bool197 fn is_alpha(self) -> bool {
198 (self >= 0x41 && self <= 0x5A) || (self >= 0x61 && self <= 0x7A)
199 }
200 #[inline]
is_alphanum(self) -> bool201 fn is_alphanum(self) -> bool {
202 self.is_alpha() || self.is_dec_digit()
203 }
204 #[inline]
is_dec_digit(self) -> bool205 fn is_dec_digit(self) -> bool {
206 self >= 0x30 && self <= 0x39
207 }
208 #[inline]
is_hex_digit(self) -> bool209 fn is_hex_digit(self) -> bool {
210 (self >= 0x30 && self <= 0x39) || (self >= 0x41 && self <= 0x46) || (self >= 0x61 && self <= 0x66)
211 }
212 #[inline]
is_oct_digit(self) -> bool213 fn is_oct_digit(self) -> bool {
214 self >= 0x30 && self <= 0x37
215 }
216 #[inline]
len(self) -> usize217 fn len(self) -> usize {
218 1
219 }
220 }
221 impl<'a> AsChar for &'a u8 {
222 #[inline]
as_char(self) -> char223 fn as_char(self) -> char {
224 *self as char
225 }
226 #[inline]
is_alpha(self) -> bool227 fn is_alpha(self) -> bool {
228 (*self >= 0x41 && *self <= 0x5A) || (*self >= 0x61 && *self <= 0x7A)
229 }
230 #[inline]
is_alphanum(self) -> bool231 fn is_alphanum(self) -> bool {
232 self.is_alpha() || self.is_dec_digit()
233 }
234 #[inline]
is_dec_digit(self) -> bool235 fn is_dec_digit(self) -> bool {
236 *self >= 0x30 && *self <= 0x39
237 }
238 #[inline]
is_hex_digit(self) -> bool239 fn is_hex_digit(self) -> bool {
240 (*self >= 0x30 && *self <= 0x39) || (*self >= 0x41 && *self <= 0x46) || (*self >= 0x61 && *self <= 0x66)
241 }
242 #[inline]
is_oct_digit(self) -> bool243 fn is_oct_digit(self) -> bool {
244 *self >= 0x30 && *self <= 0x37
245 }
246 #[inline]
len(self) -> usize247 fn len(self) -> usize {
248 1
249 }
250 }
251
252 impl AsChar for char {
253 #[inline]
as_char(self) -> char254 fn as_char(self) -> char {
255 self
256 }
257 #[cfg(feature = "alloc")]
258 #[inline]
is_alpha(self) -> bool259 fn is_alpha(self) -> bool {
260 self.is_alphabetic()
261 }
262 #[cfg(not(feature = "alloc"))]
263 #[inline]
is_alpha(self) -> bool264 fn is_alpha(self) -> bool {
265 unimplemented!(
266 "error[E0658]: use of unstable library feature 'core_char_ext': the stable interface is `impl char` in later crate (see issue #32110)"
267 )
268 }
269 #[inline]
is_alphanum(self) -> bool270 fn is_alphanum(self) -> bool {
271 self.is_alpha() || self.is_dec_digit()
272 }
273 #[inline]
is_dec_digit(self) -> bool274 fn is_dec_digit(self) -> bool {
275 self.is_digit(10)
276 }
277 #[inline]
is_hex_digit(self) -> bool278 fn is_hex_digit(self) -> bool {
279 self.is_digit(16)
280 }
281 #[inline]
is_oct_digit(self) -> bool282 fn is_oct_digit(self) -> bool {
283 self.is_digit(8)
284 }
285 #[inline]
len(self) -> usize286 fn len(self) -> usize {
287 self.len_utf8()
288 }
289 }
290
291 impl<'a> AsChar for &'a char {
292 #[inline]
as_char(self) -> char293 fn as_char(self) -> char {
294 *self
295 }
296 #[inline]
is_alpha(self) -> bool297 fn is_alpha(self) -> bool {
298 <char as AsChar>::is_alpha(*self)
299 }
300 #[inline]
is_alphanum(self) -> bool301 fn is_alphanum(self) -> bool {
302 self.is_alpha() || self.is_dec_digit()
303 }
304 #[inline]
is_dec_digit(self) -> bool305 fn is_dec_digit(self) -> bool {
306 self.is_digit(10)
307 }
308 #[inline]
is_hex_digit(self) -> bool309 fn is_hex_digit(self) -> bool {
310 self.is_digit(16)
311 }
312 #[inline]
is_oct_digit(self) -> bool313 fn is_oct_digit(self) -> bool {
314 self.is_digit(8)
315 }
316 #[inline]
len(self) -> usize317 fn len(self) -> usize {
318 self.len_utf8()
319 }
320 }
321
322 /// abstracts common iteration operations on the input type
323 ///
324 /// it needs a distinction between `Item` and `RawItem` because
325 /// `&[T]` iterates on references
326 pub trait InputIter {
327 type Item;
328 type RawItem;
329 type Iter: Iterator<Item = (usize, Self::Item)>;
330 type IterElem: Iterator<Item = Self::Item>;
331
332 /// returns an iterator over the elements and their byte offsets
iter_indices(&self) -> Self::Iter333 fn iter_indices(&self) -> Self::Iter;
334 /// returns an iterator over the elements
iter_elements(&self) -> Self::IterElem335 fn iter_elements(&self) -> Self::IterElem;
336 /// finds the byte position of the element
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::RawItem) -> bool337 fn position<P>(&self, predicate: P) -> Option<usize>
338 where
339 P: Fn(Self::RawItem) -> bool;
340 /// get the byte offset from the element's position in the stream
slice_index(&self, count: usize) -> Option<usize>341 fn slice_index(&self, count: usize) -> Option<usize>;
342 }
343
344 /// abstracts slicing operations
345 pub trait InputTake: Sized {
346 /// returns a slice of `count` bytes. panics if count > length
take(&self, count: usize) -> Self347 fn take(&self, count: usize) -> Self;
348 /// split the stream at the `count` byte offset. panics if count > length
take_split(&self, count: usize) -> (Self, Self)349 fn take_split(&self, count: usize) -> (Self, Self);
350 }
351
star(r_u8: &u8) -> u8352 fn star(r_u8: &u8) -> u8 {
353 *r_u8
354 }
355
356 impl<'a> InputIter for &'a [u8] {
357 type Item = u8;
358 type RawItem = u8;
359 type Iter = Enumerate<Self::IterElem>;
360 type IterElem = Map<Iter<'a, Self::Item>, fn(&u8) -> u8>;
361
362 #[inline]
iter_indices(&self) -> Self::Iter363 fn iter_indices(&self) -> Self::Iter {
364 self.iter_elements().enumerate()
365 }
366 #[inline]
iter_elements(&self) -> Self::IterElem367 fn iter_elements(&self) -> Self::IterElem {
368 self.iter().map(star)
369 }
370 #[inline]
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::Item) -> bool,371 fn position<P>(&self, predicate: P) -> Option<usize>
372 where
373 P: Fn(Self::Item) -> bool,
374 {
375 self.iter().position(|b| predicate(*b))
376 }
377 #[inline]
slice_index(&self, count: usize) -> Option<usize>378 fn slice_index(&self, count: usize) -> Option<usize> {
379 if self.len() >= count {
380 Some(count)
381 } else {
382 None
383 }
384 }
385 }
386
387 impl<'a> InputTake for &'a [u8] {
388 #[inline]
take(&self, count: usize) -> Self389 fn take(&self, count: usize) -> Self {
390 &self[0..count]
391 }
392 #[inline]
take_split(&self, count: usize) -> (Self, Self)393 fn take_split(&self, count: usize) -> (Self, Self) {
394 let (prefix, suffix) = self.split_at(count);
395 (suffix, prefix)
396 }
397 }
398
399 impl<'a> InputIter for &'a str {
400 type Item = char;
401 type RawItem = char;
402 type Iter = CharIndices<'a>;
403 type IterElem = Chars<'a>;
404 #[inline]
iter_indices(&self) -> Self::Iter405 fn iter_indices(&self) -> Self::Iter {
406 self.char_indices()
407 }
408 #[inline]
iter_elements(&self) -> Self::IterElem409 fn iter_elements(&self) -> Self::IterElem {
410 self.chars()
411 }
position<P>(&self, predicate: P) -> Option<usize> where P: Fn(Self::RawItem) -> bool,412 fn position<P>(&self, predicate: P) -> Option<usize>
413 where
414 P: Fn(Self::RawItem) -> bool,
415 {
416 for (o, c) in self.char_indices() {
417 if predicate(c) {
418 return Some(o);
419 }
420 }
421 None
422 }
423 #[inline]
slice_index(&self, count: usize) -> Option<usize>424 fn slice_index(&self, count: usize) -> Option<usize> {
425 let mut cnt = 0;
426 for (index, _) in self.char_indices() {
427 if cnt == count {
428 return Some(index);
429 }
430 cnt += 1;
431 }
432 if cnt == count {
433 return Some(self.len());
434 }
435 None
436 }
437 }
438
439 impl<'a> InputTake for &'a str {
440 #[inline]
take(&self, count: usize) -> Self441 fn take(&self, count: usize) -> Self {
442 &self[..count]
443 }
444
445 // return byte index
446 #[inline]
take_split(&self, count: usize) -> (Self, Self)447 fn take_split(&self, count: usize) -> (Self, Self) {
448 (&self[count..], &self[..count])
449 }
450 }
451
452 /// Dummy trait used for default implementations (currently only used for `InputTakeAtPosition`).
453 ///
454 /// When implementing a custom input type, it is possible to use directly the
455 /// default implementation: if the input type implements `InputLength`, `InputIter`,
456 /// `InputTake`, `AtEof` and `Clone`, you can implement `UnspecializedInput` and get
457 /// a default version of `InputTakeAtPosition`.
458 ///
459 /// For performance reasons, you might want to write a custom implementation of
460 /// `InputTakeAtPosition` (like the one for `&[u8]`).
461 pub trait UnspecializedInput {}
462
463 use types::CompleteStr;
464 use types::CompleteByteSlice;
465
466 /// methods to take as much input as possible until the provided function returns true for the current element
467 ///
468 /// a large part of nom's basic parsers are built using this trait
469 pub trait InputTakeAtPosition: Sized {
470 type Item;
471
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool472 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
473 where
474 P: Fn(Self::Item) -> bool;
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool475 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
476 where
477 P: Fn(Self::Item) -> bool;
478 }
479
480 impl<T: InputLength + InputIter + InputTake + AtEof + Clone + UnspecializedInput> InputTakeAtPosition for T {
481 type Item = <T as InputIter>::RawItem;
482
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,483 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
484 where
485 P: Fn(Self::Item) -> bool,
486 {
487 match self.position(predicate) {
488 Some(n) => Ok(self.take_split(n)),
489 None => {
490 if self.at_eof() {
491 Ok(self.take_split(self.input_len()))
492 } else {
493 Err(Err::Incomplete(Needed::Size(1)))
494 }
495 }
496 }
497 }
498
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,499 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
500 where
501 P: Fn(Self::Item) -> bool,
502 {
503 match self.position(predicate) {
504 Some(0) => Err(Err::Error(Context::Code(self.clone(), e))),
505 Some(n) => Ok(self.take_split(n)),
506 None => {
507 if self.at_eof() {
508 if self.input_len() == 0 {
509 Err(Err::Error(Context::Code(self.clone(), e)))
510 } else {
511 Ok(self.take_split(self.input_len()))
512 }
513 } else {
514 Err(Err::Incomplete(Needed::Size(1)))
515 }
516 }
517 }
518 }
519 }
520
521 impl<'a> InputTakeAtPosition for &'a [u8] {
522 type Item = u8;
523
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,524 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
525 where
526 P: Fn(Self::Item) -> bool,
527 {
528 match (0..self.len()).find(|b| predicate(self[*b])) {
529 Some(i) => Ok((&self[i..], &self[..i])),
530 None => Err(Err::Incomplete(Needed::Size(1))),
531 }
532 }
533
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,534 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
535 where
536 P: Fn(Self::Item) -> bool,
537 {
538 match (0..self.len()).find(|b| predicate(self[*b])) {
539 Some(0) => Err(Err::Error(Context::Code(self, e))),
540 Some(i) => Ok((&self[i..], &self[..i])),
541 None => Err(Err::Incomplete(Needed::Size(1))),
542 }
543 }
544 }
545
546 impl<'a> InputTakeAtPosition for CompleteByteSlice<'a> {
547 type Item = u8;
548
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,549 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
550 where
551 P: Fn(Self::Item) -> bool,
552 {
553 match (0..self.0.len()).find(|b| predicate(self.0[*b])) {
554 Some(i) => Ok((
555 CompleteByteSlice(&self.0[i..]),
556 CompleteByteSlice(&self.0[..i]),
557 )),
558 None => {
559 let (i, o) = self.0.take_split(self.0.len());
560 Ok((CompleteByteSlice(i), CompleteByteSlice(o)))
561 }
562 }
563 }
564
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,565 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
566 where
567 P: Fn(Self::Item) -> bool,
568 {
569 match (0..self.0.len()).find(|b| predicate(self.0[*b])) {
570 Some(0) => Err(Err::Error(Context::Code(CompleteByteSlice(self.0), e))),
571 Some(i) => Ok((
572 CompleteByteSlice(&self.0[i..]),
573 CompleteByteSlice(&self.0[..i]),
574 )),
575 None => {
576 if self.0.len() == 0 {
577 Err(Err::Error(Context::Code(CompleteByteSlice(self.0), e)))
578 } else {
579 Ok((
580 CompleteByteSlice(&self.0[self.0.len()..]),
581 CompleteByteSlice(self.0),
582 ))
583 }
584 }
585 }
586 }
587 }
588
589 impl<'a> InputTakeAtPosition for &'a str {
590 type Item = char;
591
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,592 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
593 where
594 P: Fn(Self::Item) -> bool,
595 {
596 match self.char_indices().find(|&(_, c)| predicate(c)) {
597 Some((i, _)) => Ok((&self[i..], &self[..i])),
598 None => Err(Err::Incomplete(Needed::Size(1))),
599 }
600 }
601
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,602 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
603 where
604 P: Fn(Self::Item) -> bool,
605 {
606 match self.char_indices().find(|&(_, c)| predicate(c)) {
607 Some((0, _)) => Err(Err::Error(Context::Code(self, e))),
608 Some((i, _)) => Ok((&self[i..], &self[..i])),
609 None => Err(Err::Incomplete(Needed::Size(1))),
610 }
611 }
612 }
613
614 impl<'a> InputTakeAtPosition for CompleteStr<'a> {
615 type Item = char;
616
split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,617 fn split_at_position<P>(&self, predicate: P) -> IResult<Self, Self, u32>
618 where
619 P: Fn(Self::Item) -> bool,
620 {
621 match self.0.char_indices().find(|&(_, c)| predicate(c)) {
622 Some((i, _)) => Ok((CompleteStr(&self.0[i..]), CompleteStr(&self.0[..i]))),
623 None => {
624 let (i, o) = self.0.take_split(self.0.len());
625 Ok((CompleteStr(i), CompleteStr(o)))
626 }
627 }
628 }
629
split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32> where P: Fn(Self::Item) -> bool,630 fn split_at_position1<P>(&self, predicate: P, e: ErrorKind<u32>) -> IResult<Self, Self, u32>
631 where
632 P: Fn(Self::Item) -> bool,
633 {
634 match self.0.char_indices().find(|&(_, c)| predicate(c)) {
635 Some((0, _)) => Err(Err::Error(Context::Code(CompleteStr(self.0), e))),
636 Some((i, _)) => Ok((CompleteStr(&self.0[i..]), CompleteStr(&self.0[..i]))),
637 None => {
638 if self.0.len() == 0 {
639 Err(Err::Error(Context::Code(CompleteStr(self.0), e)))
640 } else {
641 let (i, o) = self.0.take_split(self.0.len());
642 Ok((CompleteStr(i), CompleteStr(o)))
643 }
644 }
645 }
646 }
647 }
648
649 /// indicates wether a comparison was successful, an error, or
650 /// if more data was needed
651 #[derive(Debug, PartialEq)]
652 pub enum CompareResult {
653 Ok,
654 Incomplete,
655 Error,
656 }
657
658 /// abstracts comparison operations
659 pub trait Compare<T> {
660 /// compares self to another value for equality
compare(&self, t: T) -> CompareResult661 fn compare(&self, t: T) -> CompareResult;
662 /// compares self to another value for equality
663 /// independently of the case.
664 ///
665 /// warning: for `&str`, the comparison is done
666 /// by lowercasing both strings and comparing
667 /// the result. This is a temporary solution until
668 /// a better one appears
compare_no_case(&self, t: T) -> CompareResult669 fn compare_no_case(&self, t: T) -> CompareResult;
670 }
671
672 impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] {
673 #[inline(always)]
compare(&self, t: &'b [u8]) -> CompareResult674 fn compare(&self, t: &'b [u8]) -> CompareResult {
675 let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b);
676
677 match pos {
678 Some(_) => CompareResult::Error,
679 None => {
680 if self.len() >= t.len() {
681 CompareResult::Ok
682 } else {
683 CompareResult::Incomplete
684 }
685 }
686 }
687
688 /*
689 let len = self.len();
690 let blen = t.len();
691 let m = if len < blen { len } else { blen };
692 let reduced = &self[..m];
693 let b = &t[..m];
694
695 if reduced != b {
696 CompareResult::Error
697 } else if m < blen {
698 CompareResult::Incomplete
699 } else {
700 CompareResult::Ok
701 }
702 */
703 }
704
705 #[inline(always)]
compare_no_case(&self, t: &'b [u8]) -> CompareResult706 fn compare_no_case(&self, t: &'b [u8]) -> CompareResult {
707 let len = self.len();
708 let blen = t.len();
709 let m = if len < blen { len } else { blen };
710 let reduced = &self[..m];
711 let other = &t[..m];
712
713 if !reduced.iter().zip(other).all(|(a, b)| match (*a, *b) {
714 (0...64, 0...64) | (91...96, 91...96) | (123...255, 123...255) => a == b,
715 (65...90, 65...90) | (97...122, 97...122) | (65...90, 97...122) | (97...122, 65...90) => *a | 0b00_10_00_00 == *b | 0b00_10_00_00,
716 _ => false,
717 }) {
718 CompareResult::Error
719 } else if m < blen {
720 CompareResult::Incomplete
721 } else {
722 CompareResult::Ok
723 }
724 }
725 }
726
727 impl<'a, 'b> Compare<&'b str> for &'a [u8] {
728 #[inline(always)]
compare(&self, t: &'b str) -> CompareResult729 fn compare(&self, t: &'b str) -> CompareResult {
730 self.compare(AsBytes::as_bytes(t))
731 }
732 #[inline(always)]
compare_no_case(&self, t: &'b str) -> CompareResult733 fn compare_no_case(&self, t: &'b str) -> CompareResult {
734 self.compare_no_case(AsBytes::as_bytes(t))
735 }
736 }
737
738 impl<'a, 'b> Compare<&'b str> for &'a str {
739 #[inline(always)]
compare(&self, t: &'b str) -> CompareResult740 fn compare(&self, t: &'b str) -> CompareResult {
741 let pos = self.chars().zip(t.chars()).position(|(a, b)| a != b);
742
743 match pos {
744 Some(_) => CompareResult::Error,
745 None => {
746 if self.len() >= t.len() {
747 CompareResult::Ok
748 } else {
749 CompareResult::Incomplete
750 }
751 }
752 }
753 }
754
755 //FIXME: this version is too simple and does not use the current locale
756 #[cfg(feature = "alloc")]
757 #[inline(always)]
compare_no_case(&self, t: &'b str) -> CompareResult758 fn compare_no_case(&self, t: &'b str) -> CompareResult {
759 let pos = self
760 .chars()
761 .zip(t.chars())
762 .position(|(a, b)| a.to_lowercase().zip(b.to_lowercase()).any(|(a, b)| a != b));
763
764 match pos {
765 Some(_) => CompareResult::Error,
766 None => {
767 if self.len() >= t.len() {
768 CompareResult::Ok
769 } else {
770 CompareResult::Incomplete
771 }
772 }
773 }
774 }
775
776 #[cfg(not(feature = "alloc"))]
777 #[inline(always)]
compare_no_case(&self, _: &'b str) -> CompareResult778 fn compare_no_case(&self, _: &'b str) -> CompareResult {
779 unimplemented!()
780 }
781 }
782
783 /// look for self in the given input stream
784 pub trait FindToken<T> {
find_token(&self, token: T) -> bool785 fn find_token(&self, token: T) -> bool;
786 }
787
788 impl<'a> FindToken<u8> for &'a [u8] {
find_token(&self, token: u8) -> bool789 fn find_token(&self, token: u8) -> bool {
790 memchr::memchr(token, self).is_some()
791 }
792 }
793
794 impl<'a> FindToken<u8> for &'a str {
find_token(&self, token: u8) -> bool795 fn find_token(&self, token: u8) -> bool {
796 self.as_bytes().find_token(token)
797 }
798 }
799
800 impl<'a, 'b> FindToken<&'a u8> for &'b [u8] {
find_token(&self, token: &u8) -> bool801 fn find_token(&self, token: &u8) -> bool {
802 memchr::memchr(*token, self).is_some()
803 }
804 }
805
806 impl<'a, 'b> FindToken<&'a u8> for &'b str {
find_token(&self, token: &u8) -> bool807 fn find_token(&self, token: &u8) -> bool {
808 self.as_bytes().find_token(token)
809 }
810 }
811
812 impl<'a> FindToken<char> for &'a [u8] {
find_token(&self, token: char) -> bool813 fn find_token(&self, token: char) -> bool {
814 memchr::memchr(token as u8, self).is_some()
815 }
816 }
817
818 impl<'a> FindToken<char> for &'a str {
find_token(&self, token: char) -> bool819 fn find_token(&self, token: char) -> bool {
820 for i in self.chars() {
821 if token == i {
822 return true;
823 }
824 }
825 false
826 }
827 }
828
829 /// look for a substring in self
830 pub trait FindSubstring<T> {
find_substring(&self, substr: T) -> Option<usize>831 fn find_substring(&self, substr: T) -> Option<usize>;
832 }
833
834 impl<'a, 'b> FindSubstring<&'b [u8]> for &'a [u8] {
find_substring(&self, substr: &'b [u8]) -> Option<usize>835 fn find_substring(&self, substr: &'b [u8]) -> Option<usize> {
836 let substr_len = substr.len();
837
838 if substr_len == 0 {
839 // an empty substring is found at position 0
840 // This matches the behavior of str.find("").
841 Some(0)
842 } else if substr_len == 1 {
843 memchr::memchr(substr[0], self)
844 } else if substr_len > self.len() {
845 None
846 } else {
847 let max = self.len() - substr_len;
848 let mut offset = 0;
849 let mut haystack = &self[..];
850
851 while let Some(position) = memchr::memchr(substr[0], haystack) {
852 offset += position;
853
854 if offset > max {
855 return None;
856 }
857
858 if &haystack[position..position + substr_len] == substr {
859 return Some(offset);
860 }
861
862 haystack = &haystack[position + 1..];
863 offset += 1;
864 }
865
866 None
867 }
868 }
869 }
870
871 impl<'a, 'b> FindSubstring<&'b str> for &'a [u8] {
find_substring(&self, substr: &'b str) -> Option<usize>872 fn find_substring(&self, substr: &'b str) -> Option<usize> {
873 self.find_substring(AsBytes::as_bytes(substr))
874 }
875 }
876
877 impl<'a, 'b> FindSubstring<&'b str> for &'a str {
878 //returns byte index
find_substring(&self, substr: &'b str) -> Option<usize>879 fn find_substring(&self, substr: &'b str) -> Option<usize> {
880 self.find(substr)
881 }
882 }
883
884 /// used to integrate str's parse() method
885 pub trait ParseTo<R> {
parse_to(&self) -> Option<R>886 fn parse_to(&self) -> Option<R>;
887 }
888
889 impl<'a, R: FromStr> ParseTo<R> for &'a [u8] {
parse_to(&self) -> Option<R>890 fn parse_to(&self) -> Option<R> {
891 from_utf8(self).ok().and_then(|s| s.parse().ok())
892 }
893 }
894
895 impl<'a, R: FromStr> ParseTo<R> for &'a str {
parse_to(&self) -> Option<R>896 fn parse_to(&self) -> Option<R> {
897 self.parse().ok()
898 }
899 }
900
901 /// slicing operations using ranges
902 ///
903 /// this trait is loosely based on
904 /// `Index`, but can actually return
905 /// something else than a `&[T]` or `&str`
906 pub trait Slice<R> {
907 #[inline(always)]
slice(&self, range: R) -> Self908 fn slice(&self, range: R) -> Self;
909 }
910
911 macro_rules! impl_fn_slice {
912 ( $ty:ty ) => {
913 fn slice(&self, range:$ty) -> Self {
914 &self[range]
915 }
916 }
917 }
918
919 macro_rules! slice_range_impl {
920 ( [ $for_type:ident ], $ty:ty ) => {
921 impl<'a, $for_type> Slice<$ty> for &'a [$for_type] {
922 impl_fn_slice!( $ty );
923 }
924 };
925 ( $for_type:ty, $ty:ty ) => {
926 impl<'a> Slice<$ty> for &'a $for_type {
927 impl_fn_slice!( $ty );
928 }
929 }
930 }
931
932 macro_rules! slice_ranges_impl {
933 ( [ $for_type:ident ] ) => {
934 slice_range_impl! {[$for_type], Range<usize>}
935 slice_range_impl! {[$for_type], RangeTo<usize>}
936 slice_range_impl! {[$for_type], RangeFrom<usize>}
937 slice_range_impl! {[$for_type], RangeFull}
938 };
939 ( $for_type:ty ) => {
940 slice_range_impl! {$for_type, Range<usize>}
941 slice_range_impl! {$for_type, RangeTo<usize>}
942 slice_range_impl! {$for_type, RangeFrom<usize>}
943 slice_range_impl! {$for_type, RangeFull}
944 }
945 }
946
947 slice_ranges_impl! {str}
948 slice_ranges_impl! {[T]}
949
950 /// indicates whether more data can come later in input
951 ///
952 /// When working with complete data, like a file that was entirely loaded
953 /// in memory, you should use input types like `CompleteByteSlice` and
954 /// `CompleteStr` to wrap the data. The `at_eof` method of those types
955 /// always returns true, thus indicating to nom that it should not handle
956 /// partial data cases.
957 ///
958 /// When working will partial data, like data coming from the network in
959 /// buffers, the `at_eof` method can indicate if we expect more data to come,
960 /// and let nom know that some parsers could still handle more data
961 pub trait AtEof {
at_eof(&self) -> bool962 fn at_eof(&self) -> bool;
963 }
964
need_more<I: AtEof, O, E>(input: I, needed: Needed) -> IResult<I, O, E>965 pub fn need_more<I: AtEof, O, E>(input: I, needed: Needed) -> IResult<I, O, E> {
966 if input.at_eof() {
967 Err(Err::Error(Context::Code(input, ErrorKind::Eof)))
968 } else {
969 Err(Err::Incomplete(needed))
970 }
971 }
972
need_more_err<I: AtEof, O, E>(input: I, needed: Needed, err: ErrorKind<E>) -> IResult<I, O, E>973 pub fn need_more_err<I: AtEof, O, E>(input: I, needed: Needed, err: ErrorKind<E>) -> IResult<I, O, E> {
974 if input.at_eof() {
975 Err(Err::Error(Context::Code(input, err)))
976 } else {
977 Err(Err::Incomplete(needed))
978 }
979 }
980
981 // Tuple for bit parsing
982 impl<I: AtEof, T> AtEof for (I, T) {
at_eof(&self) -> bool983 fn at_eof(&self) -> bool {
984 self.0.at_eof()
985 }
986 }
987
988 impl<'a, T> AtEof for &'a [T] {
at_eof(&self) -> bool989 fn at_eof(&self) -> bool {
990 false
991 }
992 }
993
994 impl<'a> AtEof for &'a str {
at_eof(&self) -> bool995 fn at_eof(&self) -> bool {
996 false
997 }
998 }
999
1000 macro_rules! array_impls {
1001 ($($N:expr)+) => {
1002 $(
1003 impl InputLength for [u8; $N] {
1004 #[inline]
1005 fn input_len(&self) -> usize {
1006 self.len()
1007 }
1008 }
1009
1010 impl<'a> InputLength for &'a [u8; $N] {
1011 #[inline]
1012 fn input_len(&self) -> usize {
1013 self.len()
1014 }
1015 }
1016
1017 impl<'a> Compare<[u8; $N]> for &'a [u8] {
1018 #[inline(always)]
1019 fn compare(&self, t: [u8; $N]) -> CompareResult {
1020 self.compare(&t[..])
1021 }
1022
1023 #[inline(always)]
1024 fn compare_no_case(&self, t: [u8;$N]) -> CompareResult {
1025 self.compare_no_case(&t[..])
1026 }
1027 }
1028
1029 impl<'a,'b> Compare<&'b [u8; $N]> for &'a [u8] {
1030 #[inline(always)]
1031 fn compare(&self, t: &'b [u8; $N]) -> CompareResult {
1032 self.compare(&t[..])
1033 }
1034
1035 #[inline(always)]
1036 fn compare_no_case(&self, t: &'b [u8;$N]) -> CompareResult {
1037 self.compare_no_case(&t[..])
1038 }
1039 }
1040
1041 impl FindToken<u8> for [u8; $N] {
1042 fn find_token(&self, token: u8) -> bool {
1043 memchr::memchr(token, &self[..]).is_some()
1044 }
1045 }
1046
1047 impl<'a> FindToken<&'a u8> for [u8; $N] {
1048 fn find_token(&self, token: &u8) -> bool {
1049 memchr::memchr(*token, &self[..]).is_some()
1050 }
1051 }
1052 )+
1053 };
1054 }
1055
1056 array_impls! {
1057 0 1 2 3 4 5 6 7 8 9
1058 10 11 12 13 14 15 16 17 18 19
1059 20 21 22 23 24 25 26 27 28 29
1060 30 31 32
1061 }
1062
1063 /// abtracts something which can extend an `Extend`
1064 pub trait ExtendInto {
1065 type Item;
1066 type Extender: Extend<Self::Item>;
1067
1068 /// create a new `Extend` of the correct type
1069 #[inline]
new_builder(&self) -> Self::Extender1070 fn new_builder(&self) -> Self::Extender;
1071 /// accumulate the input into an accumulator
1072 #[inline]
extend_into(&self, acc: &mut Self::Extender)1073 fn extend_into(&self, acc: &mut Self::Extender);
1074 }
1075
1076 #[cfg(feature = "alloc")]
1077 impl ExtendInto for [u8] {
1078 type Item = u8;
1079 type Extender = Vec<u8>;
1080
1081 #[inline]
new_builder(&self) -> Vec<u8>1082 fn new_builder(&self) -> Vec<u8> {
1083 Vec::new()
1084 }
1085 #[inline]
extend_into(&self, acc: &mut Vec<u8>)1086 fn extend_into(&self, acc: &mut Vec<u8>) {
1087 acc.extend(self.iter().cloned());
1088 }
1089 }
1090
1091 #[cfg(feature = "alloc")]
1092 impl ExtendInto for str {
1093 type Item = char;
1094 type Extender = String;
1095
1096 #[inline]
new_builder(&self) -> String1097 fn new_builder(&self) -> String {
1098 String::new()
1099 }
1100 #[inline]
extend_into(&self, acc: &mut String)1101 fn extend_into(&self, acc: &mut String) {
1102 acc.push_str(self);
1103 }
1104 }
1105
1106 #[cfg(feature = "alloc")]
1107 impl ExtendInto for char {
1108 type Item = char;
1109 type Extender = String;
1110
1111 #[inline]
new_builder(&self) -> String1112 fn new_builder(&self) -> String {
1113 String::new()
1114 }
1115 #[inline]
extend_into(&self, acc: &mut String)1116 fn extend_into(&self, acc: &mut String) {
1117 acc.push(*self);
1118 }
1119 }
1120
1121 #[cfg(test)]
1122 mod tests {
1123 use super::*;
1124
1125 #[test]
test_offset_u8()1126 fn test_offset_u8() {
1127 let s = b"abcd123";
1128 let a = &s[..];
1129 let b = &a[2..];
1130 let c = &a[..4];
1131 let d = &a[3..5];
1132 assert_eq!(a.offset(b), 2);
1133 assert_eq!(a.offset(c), 0);
1134 assert_eq!(a.offset(d), 3);
1135 }
1136
1137 #[test]
test_offset_str()1138 fn test_offset_str() {
1139 let s = "abcřèÂßÇd123";
1140 let a = &s[..];
1141 let b = &a[7..];
1142 let c = &a[..5];
1143 let d = &a[5..9];
1144 assert_eq!(a.offset(b), 7);
1145 assert_eq!(a.offset(c), 0);
1146 assert_eq!(a.offset(d), 5);
1147 }
1148 }
1149