1 // (C) Copyright 2016 Jethro G. Beekman
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 //! Parsing C literals from byte slices.
9 //!
10 //! This will parse a representation of a C literal into a Rust type.
11 //!
12 //! # characters
13 //! Character literals are stored into the `CChar` type, which can hold values
14 //! that are not valid Unicode code points. ASCII characters are represented as
15 //! `char`, literal bytes with the high byte set are converted into the raw
16 //! representation. Escape sequences are supported. If hex and octal escapes
17 //! map to an ASCII character, that is used, otherwise, the raw encoding is
18 //! used, including for values over 255. Unicode escapes are checked for
19 //! validity and mapped to `char`. Character sequences are not supported. Width
20 //! prefixes are ignored.
21 //!
22 //! # strings
23 //! Strings are interpreted as byte vectors. Escape sequences are supported. If
24 //! hex and octal escapes map onto multi-byte characters, they are truncated to
25 //! one 8-bit character. Unicode escapes are converted into their UTF-8
26 //! encoding. Width prefixes are ignored.
27 //!
28 //! # integers
29 //! Integers are read into `i64`. Binary, octal, decimal and hexadecimal are
30 //! all supported. If the literal value is between `i64::MAX` and `u64::MAX`,
31 //! it is bit-cast to `i64`. Values over `u64::MAX` cannot be parsed. Width and
32 //! sign suffixes are ignored. Sign prefixes are not supported.
33 //!
34 //! # real numbers
35 //! Reals are read into `f64`. Width suffixes are ignored. Sign prefixes are
36 //! not supported in the significand. Hexadecimal floating points are not
37 //! supported.
38 
39 use std::char;
40 use std::str::{self,FromStr};
41 
42 use nom_crate::*;
43 
44 use expr::EvalResult;
45 
46 #[derive(Debug,Copy,Clone,PartialEq,Eq)]
47 /// Representation of a C character
48 pub enum CChar {
49 	/// A character that can be represented as a `char`
50 	Char(char),
51 	/// Any other character (8-bit characters, unicode surrogates, etc.)
52 	Raw(u64),
53 }
54 
55 impl From<u8> for CChar {
from(i: u8) -> CChar56 	fn from(i: u8) -> CChar {
57 		match i {
58 			0 ... 0x7f => CChar::Char(i as u8 as char),
59 			_ => CChar::Raw(i as u64),
60 		}
61 	}
62 }
63 
64 // A non-allocating version of this would be nice...
65 impl Into<Vec<u8>> for CChar {
into(self) -> Vec<u8>66 	fn into(self) -> Vec<u8> {
67 		match self {
68 			CChar::Char(c) => {
69 				let mut s=String::with_capacity(4);
70 				s.extend(&[c]);
71 				s.into_bytes()
72 			}
73 			CChar::Raw(i) => {
74 				let mut v=Vec::with_capacity(1);
75 				v.push(i as u8);
76 				v
77 			}
78 		}
79 	}
80 }
81 
82 // ====================================================
83 // ======== macros that shouldn't be necessary ========
84 // ====================================================
85 
split_off_prefix<'a,T>(full: &'a [T], suffix: &'a [T]) -> &'a [T]86 fn split_off_prefix<'a,T>(full: &'a [T], suffix: &'a [T]) -> &'a [T] {
87 	let n=::std::mem::size_of::<T>();
88 	let start=full.as_ptr() as usize;
89 	let end=start+(full.len()*n);
90 	let cur=suffix.as_ptr() as usize;
91 	assert!(start<=cur && cur<=end);
92 	&full[..(cur-start)/n]
93 }
94 
95 // There is a HORRIBLE BUG in nom's recognize!
96 // https://github.com/Geal/nom/issues/278
97 #[macro_export]
98 macro_rules! my_recognize (
99   ($i:expr, $submac:ident!( $($args:tt)* )) => (
100     {
101       match $submac!($i, $($args)*) {
102         IResult::Done(i,_)     => IResult::Done(i, split_off_prefix($i,i)),
103         IResult::Error(e)      => IResult::Error(e),
104         IResult::Incomplete(i) => IResult::Incomplete(i)
105       }
106     }
107   );
108   ($i:expr, $f:expr) => (
109     my_recognize!($i, call!($f))
110   );
111 );
112 
113 
114 macro_rules! force_type (
115 	($input:expr,IResult<$i:ty,$o:ty,$e:ty>) => (IResult::Error::<$i,$o,$e>(Err::Position(ErrorKind::Fix,$input)))
116 );
117 
118 
119 // =================================
120 // ======== matching digits ========
121 // =================================
122 
123 macro_rules! byte (
124 	($i:expr, $($p: pat)|* ) => ({
125 		match $i.split_first() {
126 			$(Some((&c @ $p,rest)))|* => IResult::Done::<&[_],u8,u32>(rest,c),
127 			Some(_) => IResult::Error(Err::Position(ErrorKind::OneOf,$i)),
128 			None => IResult::Incomplete(Needed::Size(1)),
129 		}
130 	})
131 );
132 
133 named!(binary<u8>,byte!(b'0' ... b'1'));
134 named!(octal<u8>,byte!(b'0' ... b'7'));
135 named!(decimal<u8>,byte!(b'0' ... b'9'));
136 named!(hexadecimal<u8>,byte!(b'0' ... b'9' | b'a' ... b'f' | b'A' ... b'F'));
137 
138 
139 // ========================================
140 // ======== characters and strings ========
141 // ========================================
142 
escape2char(c: char) -> CChar143 fn escape2char(c: char) -> CChar {
144 	CChar::Char(match c {
145 		'a' => '\x07',
146 		'b' => '\x08',
147 		'f' => '\x0c',
148 		'n' => '\n',
149 		'r' => '\r',
150 		't' => '\t',
151 		'v' => '\x0b',
152 		_ => unreachable!("invalid escape {}",c)
153 	})
154 }
155 
c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar>156 fn c_raw_escape(n: Vec<u8>, radix: u32) -> Option<CChar> {
157 	str::from_utf8(&n).ok()
158 		.and_then(|i|u64::from_str_radix(i,radix).ok())
159 		.map(|i|match i {
160 			0 ... 0x7f => CChar::Char(i as u8 as char),
161 			_ => CChar::Raw(i),
162 		})
163 }
164 
c_unicode_escape(n: Vec<u8>) -> Option<CChar>165 fn c_unicode_escape(n: Vec<u8>) -> Option<CChar> {
166 	str::from_utf8(&n).ok()
167 		.and_then(|i|u32::from_str_radix(i,16).ok())
168 		.and_then(char::from_u32)
169 		.map(CChar::Char)
170 }
171 
172 named!(escaped_char<CChar>,
173 	preceded!(char!('\\'),alt!(
174 		map!(one_of!(br#"'"?\"#),CChar::Char) |
175 		map!(one_of!(b"abfnrtv"),escape2char) |
176 		map_opt!(many_m_n!(1,3,octal),|v|c_raw_escape(v,8)) |
177 		map_opt!(preceded!(char!('x'),many1!(hexadecimal)),|v|c_raw_escape(v,16)) |
178 		map_opt!(preceded!(char!('u'),many_m_n!(4,4,hexadecimal)),c_unicode_escape) |
179 		map_opt!(preceded!(char!('U'),many_m_n!(8,8,hexadecimal)),c_unicode_escape)
180 	))
181 );
182 
183 named!(c_width_prefix,
184 	alt!(
185 		tag!("u8") |
186 		tag!("u") |
187 		tag!("U") |
188 		tag!("L")
189 	)
190 );
191 
192 named!(c_char<CChar>,
193 	delimited!(
194 		terminated!(opt!(c_width_prefix),char!('\'')),
195 		alt!( escaped_char | map!(byte!(0 ... 91 /* \=92 */ | 93 ... 255),CChar::from) ),
196 		char!('\'')
197 	)
198 );
199 
200 named!(c_string<Vec<u8> >,
201 	delimited!(
202 		alt!( preceded!(c_width_prefix,char!('"')) | char!('"') ),
203 		chain!(
204 			mut vec: value!(vec![]) ~
205 			many0!(alt!(
206 				map!(tap!(c: escaped_char => { let v: Vec<u8>=c.into(); vec.extend_from_slice(&v) } ),|_|()) |
207 				map!(tap!(s: is_not!(b"\"") => vec.extend_from_slice(s) ),|_|())
208 			)),
209 			||{return vec}
210 		),
211 		char!('"')
212 	)
213 );
214 
215 // ================================
216 // ======== parse integers ========
217 // ================================
218 
c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64>219 fn c_int_radix(n: Vec<u8>, radix: u32) -> Option<u64> {
220 	str::from_utf8(&n).ok()
221 		.and_then(|i|u64::from_str_radix(i,radix).ok())
222 }
223 
224 named!(c_int<i64>,
225 	map!(terminated!(alt_complete!(
226 		map_opt!(preceded!(tag!("0x"),many1!(hexadecimal)),|v|c_int_radix(v,16)) |
227 		map_opt!(preceded!(tag!("0b"),many1!(binary)),|v|c_int_radix(v,2)) |
228 		map_opt!(preceded!(char!('0'),many1!(octal)),|v|c_int_radix(v,8)) |
229 		map_opt!(many1!(decimal),|v|c_int_radix(v,10)) |
230 		force_type!(IResult<_,_,u32>)
231 	),is_a!("ulUL")),|i|i as i64)
232 );
233 
234 // ==============================
235 // ======== parse floats ========
236 // ==============================
237 
238 named!(float_width<u8>,complete!(byte!(b'f' | b'l' | b'F' | b'L')));
239 named!(float_exp<(Option<u8>,Vec<u8>)>,preceded!(byte!(b'e'|b'E'),pair!(opt!(byte!(b'-'|b'+')),many1!(decimal))));
240 
241 named!(c_float<f64>,
242 	map_opt!(alt!(
243 		terminated!(my_recognize!(tuple!(many1!(decimal),byte!(b'.'),many0!(decimal))),opt!(float_width)) |
244 		terminated!(my_recognize!(tuple!(many0!(decimal),byte!(b'.'),many1!(decimal))),opt!(float_width)) |
245 		terminated!(my_recognize!(tuple!(many0!(decimal),opt!(byte!(b'.')),many1!(decimal),float_exp)),opt!(float_width)) |
246 		terminated!(my_recognize!(tuple!(many1!(decimal),opt!(byte!(b'.')),many0!(decimal),float_exp)),opt!(float_width)) |
247 		terminated!(my_recognize!(many1!(decimal)),float_width)
248 	),|v|str::from_utf8(v).ok().and_then(|i|f64::from_str(i).ok()))
249 );
250 
251 // ================================
252 // ======== main interface ========
253 // ================================
254 
255 named!(one_literal<&[u8],EvalResult,::Error>,
256 	fix_error!(::Error,alt_complete!(
257 		map!(c_char,EvalResult::Char) |
258 		map!(c_int,|i|EvalResult::Int(::std::num::Wrapping(i))) |
259 		map!(c_float,EvalResult::Float) |
260 		map!(c_string,EvalResult::Str)
261 	))
262 );
263 
264 /// Parse a C literal.
265 ///
266 /// The input must contain exactly the representation of a single literal
267 /// token, and in particular no whitespace or sign prefixes.
parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error>268 pub fn parse(input: &[u8]) -> IResult<&[u8],EvalResult,::Error> {
269 	::assert_full_parse(one_literal(input))
270 }
271