1 // (C) Copyright 2016 Jethro G. Beekman
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 //! Parsing C literals from byte slices.
9 //!
10 //! This will parse a representation of a C literal into a Rust type.
11 //!
12 //! # characters
13 //! Character literals are stored into the `CChar` type, which can hold values
14 //! that are not valid Unicode code points. ASCII characters are represented as
15 //! `char`, literal bytes with the high byte set are converted into the raw
16 //! representation. Escape sequences are supported. If hex and octal escapes
17 //! map to an ASCII character, that is used, otherwise, the raw encoding is
18 //! used, including for values over 255. Unicode escapes are checked for
19 //! validity and mapped to `char`. Character sequences are not supported. Width
20 //! prefixes are ignored.
21 //!
22 //! # strings
23 //! Strings are interpreted as byte vectors. Escape sequences are supported. If
24 //! hex and octal escapes map onto multi-byte characters, they are truncated to
25 //! one 8-bit character. Unicode escapes are converted into their UTF-8
26 //! encoding. Width prefixes are ignored.
27 //!
28 //! # integers
29 //! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
30 //! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
31 //! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
32 //! sign suffixes are ignored. Sign prefixes are not supported.
33 //!
34 //! # real numbers
35 //! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
36 //! not supported in the significand. Hexadecimal floating points are not
37 //! supported.
38
39 use std::char;
40 use std::str::{self,FromStr};
41
42 use nom_crate::*;
43
44 use expr::EvalResult;
45
46 #[derive(Debug,Copy,Clone,PartialEq,Eq)]
47 /// Representation of a C character
48 pub enum CChar {
49 /// A character that can be represented as a `char`
50 Char(char),
51 /// Any other character (8-bit characters, unicode surrogates, etc.)
52 Raw(u64),
53 }
54
55 impl From<u8> for CChar {
from(i: u8) -> CChar56 fn from(i: u8) -> CChar {
57 match i {
58 0 ... 0x7f => CChar::Char(i as u8 as char),
59 _ => CChar::Raw(i as u64),
60 }
61 }
62 }
63
64 // A non-allocating version of this would be nice...
65 impl Into<Vec<u8>> for CChar {
into(self) -> Vec<u8>66 fn into(self) -> Vec<u8> {
67 match self {
68 CChar::Char(c) => {
69 let mut s=String::with_capacity(4);
70 s.extend(&[c]);
71 s.into_bytes()
72 }
73 CChar::Raw(i) => {
74 let mut v=Vec::with_capacity(1);
75 v.push(i as u8);
76 v
77 }
78 }
79 }
80 }
81
82 /// ensures the child parser consumes the whole input
83 #[macro_export]
84 macro_rules! full (
85 ($i: expr, $submac:ident!( $($args:tt)* )) => (
86 {
87 use ::nom_crate::lib::std::result::Result::*;
88 let res = $submac!($i, $($args)*);
89 match res {
90 Ok((i, o)) => if i.len() == 0 {
91 Ok((i, o))
92 } else {
93 Err(::nom_crate::Err::Error(error_position!(i, ::nom_crate::ErrorKind::Custom(42))))
94 },
95 r => r,
96 }
97 }
98 );
99 ($i:expr, $f:ident) => (
100 full!($i, call!($f));
101 );
102 );
103
104 // ====================================================
105 // ======== macros that shouldn't be necessary ========
106 // ====================================================
107
108 macro_rules! force_type (
109 ($input:expr,IResult<$i:ty,$o:ty,$e:ty>) => (Err::<($i,$o),Err<$i,$e>>(::nom_crate::Err::Error(error_position!($input, ErrorKind::Fix))))
110 );
111
112
113 // =================================
114 // ======== matching digits ========
115 // =================================
116
117 macro_rules! byte (
118 ($i:expr, $($p: pat)|* ) => ({
119 match $i.split_first() {
120 $(Some((&c @ $p,rest)))|* => Ok::<(&[_],u8),::nom_crate::Err<&[_],u32>>((rest,c)),
121 Some(_) => Err(::nom_crate::Err::Error(error_position!($i, ErrorKind::OneOf))),
122 None => Err(::nom_crate::Err::Incomplete(Needed::Size(1))),
123 }
124 })
125 );
126
127 named!(binary<u8>,byte!(b'0' ... b'1'));
128 named!(octal<u8>,byte!(b'0' ... b'7'));
129 named!(decimal<u8>,byte!(b'0' ... b'9'));
130 named!(hexadecimal<u8>,byte!(b'0' ... b'9' | b'a' ... b'f' | b'A' ... b'F'));
131
132
133 // ========================================
134 // ======== characters and strings ========
135 // ========================================
136
escape2char(c: char) -> CChar137 fn escape2char(c: char) -> CChar {
138 CChar::Char(match c {
139 'a' => '\x07',
140 'b' => '\x08',
141 'f' => '\x0c',
142 'n' => '\n',
143 'r' => '\r',
144 't' => '\t',
145 'v' => '\x0b',
146 _ => unreachable!("invalid escape {}",c)
147 })
148 }
149
c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar>150 fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
151 str::from_utf8(&n).ok()
152 .and_then(|i|u64::from_str_radix(i,radix).ok())
153 .map(|i|match i {
154 0 ... 0x7f => CChar::Char(i as u8 as char),
155 _ => CChar::Raw(i),
156 })
157 }
158
c_unicode_escape(n: Vec<u8>) -> Option<CChar>159 fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
160 str::from_utf8(&n).ok()
161 .and_then(|i|u32::from_str_radix(i,16).ok())
162 .and_then(char::from_u32)
163 .map(CChar::Char)
164 }
165
166 named!(escaped_char<CChar>,
167 preceded!(complete!(char!('\\')),alt_complete!(
168 map!(one_of!(r#"'"?\"#),CChar::Char) |
169 map!(one_of!("abfnrtv"),escape2char) |
170 map_opt!(many_m_n!(1,3,octal),|v|c_raw_escape(v,8)) |
171 map_opt!(preceded!(char!('x'),many1!(hexadecimal)),|v|c_raw_escape(v,16)) |
172 map_opt!(preceded!(char!('u'),many_m_n!(4,4,hexadecimal)),c_unicode_escape) |
173 map_opt!(preceded!(char!('U'),many_m_n!(8,8,hexadecimal)),c_unicode_escape)
174 ))
175 );
176
177 named!(c_width_prefix,
178 alt!(
179 tag!("u8") |
180 tag!("u") |
181 tag!("U") |
182 tag!("L")
183 )
184 );
185
186 named!(c_char<CChar>,
187 delimited!(
188 terminated!(opt!(c_width_prefix),char!('\'')),
189 alt!( escaped_char | map!(byte!(0 ... 91 /* \=92 */ | 93 ... 255),CChar::from) ),
190 char!('\'')
191 )
192 );
193
194 named!(c_string<Vec<u8> >,
195 delimited!(
196 alt!( preceded!(c_width_prefix,char!('"')) | char!('"') ),
197 fold_many0!(
198 alt!(map!(escaped_char, |c:CChar| c.into()) | map!(is_not!([b'\\', b'"']), |c: &[u8]| c.into())),
199 Vec::new(),
200 |mut v: Vec<u8>, res:Vec<u8>| { v.extend_from_slice(&res); v }
201 ),
202 char!('"')
203 )
204 );
205
206 // ================================
207 // ======== parse integers ========
208 // ================================
209
c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64>210 fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
211 str::from_utf8(&n).ok()
212 .and_then(|i|u64::from_str_radix(i,radix).ok())
213 }
214
take_ul(input: &[u8]) -> IResult<&[u8], &[u8]>215 fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
216 use ::nom_crate::InputTakeAtPosition;
217
218 let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');
219 match r {
220 Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
221 res => res,
222 }
223 }
224
225 named!(c_int<i64>,
226 map!(terminated!(alt_complete!(
227 map_opt!(preceded!(tag!("0x"),many1!(complete!(hexadecimal))),|v|c_int_radix(v,16)) |
228 map_opt!(preceded!(tag!("0X"),many1!(complete!(hexadecimal))),|v|c_int_radix(v,16)) |
229 map_opt!(preceded!(tag!("0b"),many1!(complete!(binary))),|v|c_int_radix(v,2)) |
230 map_opt!(preceded!(tag!("0B"),many1!(complete!(binary))),|v|c_int_radix(v,2)) |
231 map_opt!(preceded!(char!('0'),many1!(complete!(octal))),|v|c_int_radix(v,8)) |
232 map_opt!(many1!(complete!(decimal)),|v|c_int_radix(v,10)) |
233 force_type!(IResult<_,_,u32>)
234 ),opt!(take_ul)),|i|i as i64)
235 );
236
237 // ==============================
238 // ======== parse floats ========
239 // ==============================
240
241 named!(float_width<u8>,complete!(byte!(b'f' | b'l' | b'F' | b'L')));
242 named!(float_exp<(Option<u8>,Vec<u8>)>,preceded!(byte!(b'e'|b'E'),pair!(opt!(byte!(b'-'|b'+')),many1!(complete!(decimal)))));
243
244 named!(c_float<f64>,
245 map_opt!(alt!(
246 terminated!(recognize!(tuple!(many1!(complete!(decimal)),byte!(b'.'),many0!(complete!(decimal)))),opt!(float_width)) |
247 terminated!(recognize!(tuple!(many0!(complete!(decimal)),byte!(b'.'),many1!(complete!(decimal)))),opt!(float_width)) |
248 terminated!(recognize!(tuple!(many0!(complete!(decimal)),opt!(byte!(b'.')),many1!(complete!(decimal)),float_exp)),opt!(float_width)) |
249 terminated!(recognize!(tuple!(many1!(complete!(decimal)),opt!(byte!(b'.')),many0!(complete!(decimal)),float_exp)),opt!(float_width)) |
250 terminated!(recognize!(many1!(complete!(decimal))),float_width)
251 ),|v|str::from_utf8(v).ok().and_then(|i|f64::from_str(i).ok()))
252 );
253
254 // ================================
255 // ======== main interface ========
256 // ================================
257
258 named!(one_literal<&[u8],EvalResult,::Error>,
259 fix_error!(::Error,alt_complete!(
260 map!(full!(c_char),EvalResult::Char) |
261 map!(full!(c_int),|i|EvalResult::Int(::std::num::Wrapping(i))) |
262 map!(full!(c_float),EvalResult::Float) |
263 map!(full!(c_string),EvalResult::Str)
264 ))
265 );
266
267 /// Parse a C literal.
268 ///
269 /// The input must contain exactly the representation of a single literal
270 /// token, and in particular no whitespace or sign prefixes.
parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error>271 pub fn parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error> {
272 ::assert_full_parse(one_literal(input))
273 }
274