1 // (C) Copyright 2016 Jethro G. Beekman
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 //! Parsing C literals from byte slices.
9 //!
10 //! This will parse a representation of a C literal into a Rust type.
11 //!
12 //! # characters
13 //! Character literals are stored into the `CChar` type, which can hold values
14 //! that are not valid Unicode code points. ASCII characters are represented as
15 //! `char`, literal bytes with the high byte set are converted into the raw
16 //! representation. Escape sequences are supported. If hex and octal escapes
17 //! map to an ASCII character, that is used, otherwise, the raw encoding is
18 //! used, including for values over 255. Unicode escapes are checked for
19 //! validity and mapped to `char`. Character sequences are not supported. Width
20 //! prefixes are ignored.
21 //!
22 //! # strings
23 //! Strings are interpreted as byte vectors. Escape sequences are supported. If
24 //! hex and octal escapes map onto multi-byte characters, they are truncated to
25 //! one 8-bit character. Unicode escapes are converted into their UTF-8
26 //! encoding. Width prefixes are ignored.
27 //!
28 //! # integers
29 //! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
30 //! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
31 //! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
32 //! sign suffixes are ignored. Sign prefixes are not supported.
33 //!
34 //! # real numbers
35 //! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
36 //! not supported in the significand. Hexadecimal floating points are not
37 //! supported.
38 
39 use std::char;
40 use std::str::{self,FromStr};
41 
42 use nom_crate::*;
43 
44 use expr::EvalResult;
45 
46 #[derive(Debug,Copy,Clone,PartialEq,Eq)]
47 /// Representation of a C character
48 pub enum CChar {
49 	/// A character that can be represented as a `char`
50 	Char(char),
51 	/// Any other character (8-bit characters, unicode surrogates, etc.)
52 	Raw(u64),
53 }
54 
55 impl From<u8> for CChar {
from(i: u8) -> CChar56 	fn from(i: u8) -> CChar {
57 		match i {
58 			0 ... 0x7f => CChar::Char(i as u8 as char),
59 			_ => CChar::Raw(i as u64),
60 		}
61 	}
62 }
63 
64 // A non-allocating version of this would be nice...
65 impl Into<Vec<u8>> for CChar {
into(self) -> Vec<u8>66 	fn into(self) -> Vec<u8> {
67 		match self {
68 			CChar::Char(c) => {
69 				let mut s=String::with_capacity(4);
70 				s.extend(&[c]);
71 				s.into_bytes()
72 			}
73 			CChar::Raw(i) => {
74 				let mut v=Vec::with_capacity(1);
75 				v.push(i as u8);
76 				v
77 			}
78 		}
79 	}
80 }
81 
82 /// ensures the child parser consumes the whole input
83 #[macro_export]
84 macro_rules! full (
85 	($i: expr, $submac:ident!( $($args:tt)* )) => (
86 		{
87 			use ::nom_crate::lib::std::result::Result::*;
88 			let res =  $submac!($i, $($args)*);
89 			match res {
90 				Ok((i, o)) => if i.len() == 0 {
91 					Ok((i, o))
92 				} else {
93 					Err(::nom_crate::Err::Error(error_position!(i, ::nom_crate::ErrorKind::Custom(42))))
94 				},
95 				r => r,
96 			}
97 		}
98 	);
99 	($i:expr, $f:ident) => (
100 		full!($i, call!($f));
101 	);
102 );
103 
104 // ====================================================
105 // ======== macros that shouldn't be necessary ========
106 // ====================================================
107 
108 macro_rules! force_type (
109 	($input:expr,IResult<$i:ty,$o:ty,$e:ty>) => (Err::<($i,$o),Err<$i,$e>>(::nom_crate::Err::Error(error_position!($input, ErrorKind::Fix))))
110 );
111 
112 
113 // =================================
114 // ======== matching digits ========
115 // =================================
116 
117 macro_rules! byte (
118 	($i:expr, $($p: pat)|* ) => ({
119 		match $i.split_first() {
120 			$(Some((&c @ $p,rest)))|* => Ok::<(&[_],u8),::nom_crate::Err<&[_],u32>>((rest,c)),
121 			Some(_) => Err(::nom_crate::Err::Error(error_position!($i, ErrorKind::OneOf))),
122 			None => Err(::nom_crate::Err::Incomplete(Needed::Size(1))),
123 		}
124 	})
125 );
126 
127 named!(binary<u8>,byte!(b'0' ... b'1'));
128 named!(octal<u8>,byte!(b'0' ... b'7'));
129 named!(decimal<u8>,byte!(b'0' ... b'9'));
130 named!(hexadecimal<u8>,byte!(b'0' ... b'9' | b'a' ... b'f' | b'A' ... b'F'));
131 
132 
133 // ========================================
134 // ======== characters and strings ========
135 // ========================================
136 
escape2char(c: char) -> CChar137 fn escape2char(c: char) -> CChar {
138 	CChar::Char(match c {
139 		'a' => '\x07',
140 		'b' => '\x08',
141 		'f' => '\x0c',
142 		'n' => '\n',
143 		'r' => '\r',
144 		't' => '\t',
145 		'v' => '\x0b',
146 		_ => unreachable!("invalid escape {}",c)
147 	})
148 }
149 
c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar>150 fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
151 	str::from_utf8(&n).ok()
152 		.and_then(|i|u64::from_str_radix(i,radix).ok())
153 		.map(|i|match i {
154 			0 ... 0x7f => CChar::Char(i as u8 as char),
155 			_ => CChar::Raw(i),
156 		})
157 }
158 
c_unicode_escape(n: Vec<u8>) -> Option<CChar>159 fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
160 	str::from_utf8(&n).ok()
161 		.and_then(|i|u32::from_str_radix(i,16).ok())
162 		.and_then(char::from_u32)
163 		.map(CChar::Char)
164 }
165 
166 named!(escaped_char<CChar>,
167 	preceded!(complete!(char!('\\')),alt_complete!(
168 		map!(one_of!(r#"'"?\"#),CChar::Char) |
169 		map!(one_of!("abfnrtv"),escape2char) |
170 		map_opt!(many_m_n!(1,3,octal),|v|c_raw_escape(v,8)) |
171 		map_opt!(preceded!(char!('x'),many1!(hexadecimal)),|v|c_raw_escape(v,16)) |
172 		map_opt!(preceded!(char!('u'),many_m_n!(4,4,hexadecimal)),c_unicode_escape) |
173 		map_opt!(preceded!(char!('U'),many_m_n!(8,8,hexadecimal)),c_unicode_escape)
174 	))
175 );
176 
177 named!(c_width_prefix,
178 	alt!(
179 		tag!("u8") |
180 		tag!("u") |
181 		tag!("U") |
182 		tag!("L")
183 	)
184 );
185 
186 named!(c_char<CChar>,
187 	delimited!(
188 		terminated!(opt!(c_width_prefix),char!('\'')),
189 		alt!( escaped_char | map!(byte!(0 ... 91 /* \=92 */ | 93 ... 255),CChar::from) ),
190 		char!('\'')
191 	)
192 );
193 
194 named!(c_string<Vec<u8> >,
195 	delimited!(
196 		alt!( preceded!(c_width_prefix,char!('"')) | char!('"') ),
197 		fold_many0!(
198 			alt!(map!(escaped_char, |c:CChar| c.into()) | map!(is_not!([b'\\', b'"']), |c: &[u8]| c.into())),
199 			Vec::new(),
200 			|mut v: Vec<u8>, res:Vec<u8>| { v.extend_from_slice(&res); v }
201 		),
202 		char!('"')
203 	)
204 );
205 
206 // ================================
207 // ======== parse integers ========
208 // ================================
209 
c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64>210 fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
211 	str::from_utf8(&n).ok()
212 		.and_then(|i|u64::from_str_radix(i,radix).ok())
213 }
214 
take_ul(input: &[u8]) -> IResult<&[u8], &[u8]>215 fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> {
216 	use ::nom_crate::InputTakeAtPosition;
217 
218 	let r = input.split_at_position(|c| c != b'u' && c != b'U' && c != b'l' && c != b'L');
219 	match r {
220 		Err(Err::Incomplete(_)) => Ok((&input[input.len()..], input)),
221 		res => res,
222 	}
223 }
224 
225 named!(c_int<i64>,
226 	map!(terminated!(alt_complete!(
227 		map_opt!(preceded!(tag!("0x"),many1!(complete!(hexadecimal))),|v|c_int_radix(v,16)) |
228 		map_opt!(preceded!(tag!("0X"),many1!(complete!(hexadecimal))),|v|c_int_radix(v,16)) |
229 		map_opt!(preceded!(tag!("0b"),many1!(complete!(binary))),|v|c_int_radix(v,2)) |
230 		map_opt!(preceded!(tag!("0B"),many1!(complete!(binary))),|v|c_int_radix(v,2)) |
231 		map_opt!(preceded!(char!('0'),many1!(complete!(octal))),|v|c_int_radix(v,8)) |
232 		map_opt!(many1!(complete!(decimal)),|v|c_int_radix(v,10)) |
233 		force_type!(IResult<_,_,u32>)
234 	),opt!(take_ul)),|i|i as i64)
235 );
236 
237 // ==============================
238 // ======== parse floats ========
239 // ==============================
240 
241 named!(float_width<u8>,complete!(byte!(b'f' | b'l' | b'F' | b'L')));
242 named!(float_exp<(Option<u8>,Vec<u8>)>,preceded!(byte!(b'e'|b'E'),pair!(opt!(byte!(b'-'|b'+')),many1!(complete!(decimal)))));
243 
244 named!(c_float<f64>,
245 	map_opt!(alt!(
246 		terminated!(recognize!(tuple!(many1!(complete!(decimal)),byte!(b'.'),many0!(complete!(decimal)))),opt!(float_width)) |
247 		terminated!(recognize!(tuple!(many0!(complete!(decimal)),byte!(b'.'),many1!(complete!(decimal)))),opt!(float_width)) |
248 		terminated!(recognize!(tuple!(many0!(complete!(decimal)),opt!(byte!(b'.')),many1!(complete!(decimal)),float_exp)),opt!(float_width)) |
249 		terminated!(recognize!(tuple!(many1!(complete!(decimal)),opt!(byte!(b'.')),many0!(complete!(decimal)),float_exp)),opt!(float_width)) |
250 		terminated!(recognize!(many1!(complete!(decimal))),float_width)
251 	),|v|str::from_utf8(v).ok().and_then(|i|f64::from_str(i).ok()))
252 );
253 
254 // ================================
255 // ======== main interface ========
256 // ================================
257 
258 named!(one_literal<&[u8],EvalResult,::Error>,
259 	fix_error!(::Error,alt_complete!(
260 		map!(full!(c_char),EvalResult::Char) |
261 		map!(full!(c_int),|i|EvalResult::Int(::std::num::Wrapping(i))) |
262 		map!(full!(c_float),EvalResult::Float) |
263 		map!(full!(c_string),EvalResult::Str)
264 	))
265 );
266 
267 /// Parse a C literal.
268 ///
269 /// The input must contain exactly the representation of a single literal
270 /// token, and in particular no whitespace or sign prefixes.
parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error>271 pub fn parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error> {
272 	::assert_full_parse(one_literal(input))
273 }
274