1 //! Adapted from [`nom`](https://github.com/Geal/nom).
2
3 use std::str::{Bytes, CharIndices, Chars};
4
5 use unicode_xid::UnicodeXID;
6
7 use fallback::LexError;
8
9 #[derive(Copy, Clone, Eq, PartialEq)]
10 pub struct Cursor<'a> {
11 pub rest: &'a str,
12 #[cfg(span_locations)]
13 pub off: u32,
14 }
15
16 impl<'a> Cursor<'a> {
17 #[cfg(not(span_locations))]
advance(&self, amt: usize) -> Cursor<'a>18 pub fn advance(&self, amt: usize) -> Cursor<'a> {
19 Cursor {
20 rest: &self.rest[amt..],
21 }
22 }
23 #[cfg(span_locations)]
advance(&self, amt: usize) -> Cursor<'a>24 pub fn advance(&self, amt: usize) -> Cursor<'a> {
25 Cursor {
26 rest: &self.rest[amt..],
27 off: self.off + (amt as u32),
28 }
29 }
30
find(&self, p: char) -> Option<usize>31 pub fn find(&self, p: char) -> Option<usize> {
32 self.rest.find(p)
33 }
34
starts_with(&self, s: &str) -> bool35 pub fn starts_with(&self, s: &str) -> bool {
36 self.rest.starts_with(s)
37 }
38
is_empty(&self) -> bool39 pub fn is_empty(&self) -> bool {
40 self.rest.is_empty()
41 }
42
len(&self) -> usize43 pub fn len(&self) -> usize {
44 self.rest.len()
45 }
46
as_bytes(&self) -> &'a [u8]47 pub fn as_bytes(&self) -> &'a [u8] {
48 self.rest.as_bytes()
49 }
50
bytes(&self) -> Bytes<'a>51 pub fn bytes(&self) -> Bytes<'a> {
52 self.rest.bytes()
53 }
54
chars(&self) -> Chars<'a>55 pub fn chars(&self) -> Chars<'a> {
56 self.rest.chars()
57 }
58
char_indices(&self) -> CharIndices<'a>59 pub fn char_indices(&self) -> CharIndices<'a> {
60 self.rest.char_indices()
61 }
62 }
63
64 pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
65
whitespace(input: Cursor) -> PResult<()>66 pub fn whitespace(input: Cursor) -> PResult<()> {
67 if input.is_empty() {
68 return Err(LexError);
69 }
70
71 let bytes = input.as_bytes();
72 let mut i = 0;
73 while i < bytes.len() {
74 let s = input.advance(i);
75 if bytes[i] == b'/' {
76 if s.starts_with("//")
77 && (!s.starts_with("///") || s.starts_with("////"))
78 && !s.starts_with("//!")
79 {
80 if let Some(len) = s.find('\n') {
81 i += len + 1;
82 continue;
83 }
84 break;
85 } else if s.starts_with("/**/") {
86 i += 4;
87 continue;
88 } else if s.starts_with("/*")
89 && (!s.starts_with("/**") || s.starts_with("/***"))
90 && !s.starts_with("/*!")
91 {
92 let (_, com) = block_comment(s)?;
93 i += com.len();
94 continue;
95 }
96 }
97 match bytes[i] {
98 b' ' | 0x09...0x0d => {
99 i += 1;
100 continue;
101 }
102 b if b <= 0x7f => {}
103 _ => {
104 let ch = s.chars().next().unwrap();
105 if is_whitespace(ch) {
106 i += ch.len_utf8();
107 continue;
108 }
109 }
110 }
111 return if i > 0 { Ok((s, ())) } else { Err(LexError) };
112 }
113 Ok((input.advance(input.len()), ()))
114 }
115
block_comment(input: Cursor) -> PResult<&str>116 pub fn block_comment(input: Cursor) -> PResult<&str> {
117 if !input.starts_with("/*") {
118 return Err(LexError);
119 }
120
121 let mut depth = 0;
122 let bytes = input.as_bytes();
123 let mut i = 0;
124 let upper = bytes.len() - 1;
125 while i < upper {
126 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
127 depth += 1;
128 i += 1; // eat '*'
129 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
130 depth -= 1;
131 if depth == 0 {
132 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
133 }
134 i += 1; // eat '/'
135 }
136 i += 1;
137 }
138 Err(LexError)
139 }
140
skip_whitespace(input: Cursor) -> Cursor141 pub fn skip_whitespace(input: Cursor) -> Cursor {
142 match whitespace(input) {
143 Ok((rest, _)) => rest,
144 Err(LexError) => input,
145 }
146 }
147
is_whitespace(ch: char) -> bool148 fn is_whitespace(ch: char) -> bool {
149 // Rust treats left-to-right mark and right-to-left mark as whitespace
150 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
151 }
152
word_break(input: Cursor) -> PResult<()>153 pub fn word_break(input: Cursor) -> PResult<()> {
154 match input.chars().next() {
155 Some(ch) if UnicodeXID::is_xid_continue(ch) => Err(LexError),
156 Some(_) | None => Ok((input, ())),
157 }
158 }
159
160 macro_rules! named {
161 ($name:ident -> $o:ty, $submac:ident!( $($args:tt)* )) => {
162 fn $name<'a>(i: Cursor<'a>) -> $crate::strnom::PResult<'a, $o> {
163 $submac!(i, $($args)*)
164 }
165 };
166 }
167
168 macro_rules! alt {
169 ($i:expr, $e:ident | $($rest:tt)*) => {
170 alt!($i, call!($e) | $($rest)*)
171 };
172
173 ($i:expr, $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
174 match $subrule!($i, $($args)*) {
175 res @ Ok(_) => res,
176 _ => alt!($i, $($rest)*)
177 }
178 };
179
180 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)+) => {
181 match $subrule!($i, $($args)*) {
182 Ok((i, o)) => Ok((i, $gen(o))),
183 Err(LexError) => alt!($i, $($rest)*)
184 }
185 };
186
187 ($i:expr, $e:ident => { $gen:expr } | $($rest:tt)*) => {
188 alt!($i, call!($e) => { $gen } | $($rest)*)
189 };
190
191 ($i:expr, $e:ident => { $gen:expr }) => {
192 alt!($i, call!($e) => { $gen })
193 };
194
195 ($i:expr, $subrule:ident!( $($args:tt)* ) => { $gen:expr }) => {
196 match $subrule!($i, $($args)*) {
197 Ok((i, o)) => Ok((i, $gen(o))),
198 Err(LexError) => Err(LexError),
199 }
200 };
201
202 ($i:expr, $e:ident) => {
203 alt!($i, call!($e))
204 };
205
206 ($i:expr, $subrule:ident!( $($args:tt)*)) => {
207 $subrule!($i, $($args)*)
208 };
209 }
210
211 macro_rules! do_parse {
212 ($i:expr, ( $($rest:expr),* )) => {
213 Ok(($i, ( $($rest),* )))
214 };
215
216 ($i:expr, $e:ident >> $($rest:tt)*) => {
217 do_parse!($i, call!($e) >> $($rest)*)
218 };
219
220 ($i:expr, $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
221 match $submac!($i, $($args)*) {
222 Err(LexError) => Err(LexError),
223 Ok((i, _)) => do_parse!(i, $($rest)*),
224 }
225 };
226
227 ($i:expr, $field:ident : $e:ident >> $($rest:tt)*) => {
228 do_parse!($i, $field: call!($e) >> $($rest)*)
229 };
230
231 ($i:expr, $field:ident : $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
232 match $submac!($i, $($args)*) {
233 Err(LexError) => Err(LexError),
234 Ok((i, o)) => {
235 let $field = o;
236 do_parse!(i, $($rest)*)
237 },
238 }
239 };
240 }
241
242 macro_rules! peek {
243 ($i:expr, $submac:ident!( $($args:tt)* )) => {
244 match $submac!($i, $($args)*) {
245 Ok((_, o)) => Ok(($i, o)),
246 Err(LexError) => Err(LexError),
247 }
248 };
249 }
250
251 macro_rules! call {
252 ($i:expr, $fun:expr $(, $args:expr)*) => {
253 $fun($i $(, $args)*)
254 };
255 }
256
257 macro_rules! option {
258 ($i:expr, $f:expr) => {
259 match $f($i) {
260 Ok((i, o)) => Ok((i, Some(o))),
261 Err(LexError) => Ok(($i, None)),
262 }
263 };
264 }
265
266 macro_rules! take_until_newline_or_eof {
267 ($i:expr,) => {{
268 if $i.len() == 0 {
269 Ok(($i, ""))
270 } else {
271 match $i.find('\n') {
272 Some(i) => Ok(($i.advance(i), &$i.rest[..i])),
273 None => Ok(($i.advance($i.len()), &$i.rest[..$i.len()])),
274 }
275 }
276 }};
277 }
278
279 macro_rules! tuple {
280 ($i:expr, $($rest:tt)*) => {
281 tuple_parser!($i, (), $($rest)*)
282 };
283 }
284
285 /// Do not use directly. Use `tuple!`.
286 macro_rules! tuple_parser {
287 ($i:expr, ($($parsed:tt),*), $e:ident, $($rest:tt)*) => {
288 tuple_parser!($i, ($($parsed),*), call!($e), $($rest)*)
289 };
290
291 ($i:expr, (), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
292 match $submac!($i, $($args)*) {
293 Err(LexError) => Err(LexError),
294 Ok((i, o)) => tuple_parser!(i, (o), $($rest)*),
295 }
296 };
297
298 ($i:expr, ($($parsed:tt)*), $submac:ident!( $($args:tt)* ), $($rest:tt)*) => {
299 match $submac!($i, $($args)*) {
300 Err(LexError) => Err(LexError),
301 Ok((i, o)) => tuple_parser!(i, ($($parsed)* , o), $($rest)*),
302 }
303 };
304
305 ($i:expr, ($($parsed:tt),*), $e:ident) => {
306 tuple_parser!($i, ($($parsed),*), call!($e))
307 };
308
309 ($i:expr, (), $submac:ident!( $($args:tt)* )) => {
310 $submac!($i, $($args)*)
311 };
312
313 ($i:expr, ($($parsed:expr),*), $submac:ident!( $($args:tt)* )) => {
314 match $submac!($i, $($args)*) {
315 Err(LexError) => Err(LexError),
316 Ok((i, o)) => Ok((i, ($($parsed),*, o)))
317 }
318 };
319
320 ($i:expr, ($($parsed:expr),*)) => {
321 Ok(($i, ($($parsed),*)))
322 };
323 }
324
325 macro_rules! not {
326 ($i:expr, $submac:ident!( $($args:tt)* )) => {
327 match $submac!($i, $($args)*) {
328 Ok((_, _)) => Err(LexError),
329 Err(LexError) => Ok(($i, ())),
330 }
331 };
332 }
333
334 macro_rules! tag {
335 ($i:expr, $tag:expr) => {
336 if $i.starts_with($tag) {
337 Ok(($i.advance($tag.len()), &$i.rest[..$tag.len()]))
338 } else {
339 Err(LexError)
340 }
341 };
342 }
343
344 macro_rules! punct {
345 ($i:expr, $punct:expr) => {
346 $crate::strnom::punct($i, $punct)
347 };
348 }
349
350 /// Do not use directly. Use `punct!`.
punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str>351 pub fn punct<'a>(input: Cursor<'a>, token: &'static str) -> PResult<'a, &'a str> {
352 let input = skip_whitespace(input);
353 if input.starts_with(token) {
354 Ok((input.advance(token.len()), token))
355 } else {
356 Err(LexError)
357 }
358 }
359
360 macro_rules! preceded {
361 ($i:expr, $submac:ident!( $($args:tt)* ), $submac2:ident!( $($args2:tt)* )) => {
362 match tuple!($i, $submac!($($args)*), $submac2!($($args2)*)) {
363 Ok((remaining, (_, o))) => Ok((remaining, o)),
364 Err(LexError) => Err(LexError),
365 }
366 };
367
368 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
369 preceded!($i, $submac!($($args)*), call!($g))
370 };
371 }
372
373 macro_rules! delimited {
374 ($i:expr, $submac:ident!( $($args:tt)* ), $($rest:tt)+) => {
375 match tuple_parser!($i, (), $submac!($($args)*), $($rest)*) {
376 Err(LexError) => Err(LexError),
377 Ok((i1, (_, o, _))) => Ok((i1, o))
378 }
379 };
380 }
381
382 macro_rules! map {
383 ($i:expr, $submac:ident!( $($args:tt)* ), $g:expr) => {
384 match $submac!($i, $($args)*) {
385 Err(LexError) => Err(LexError),
386 Ok((i, o)) => Ok((i, call!(o, $g)))
387 }
388 };
389
390 ($i:expr, $f:expr, $g:expr) => {
391 map!($i, call!($f), $g)
392 };
393 }
394