1 use std::str;
2 use std::fmt;
3 use std::fmt::Write;
4 use std::iter::Iterator;
5 use std::string::String;
6 
7 use types::*;
8 
9 #[derive(Debug, PartialEq)]
10 pub struct Formatter<'a, 'b> {
11     pub key: &'a str,
12     fill: char,
13     align: Alignment, // default Right
14     sign: Sign,
15     alternate: bool,
16     width: Option<usize>,
17     thousands: bool,
18     precision: Option<usize>,
19     ty: Option<char>,
20     buff: &'b mut String,
21     pattern: &'a str,
22 }
23 
is_alignment_token(c: char) -> bool24 fn is_alignment_token(c: char) -> bool {
25     match c {
26         '=' | '<' | '^' | '>' => true,
27         _ => false,
28     }
29 }
30 
is_sign_element(c: char) -> bool31 fn is_sign_element(c: char) -> bool {
32     match c {
33         ' ' | '-' | '+' => true,
34         _ => false,
35     }
36 }
37 
is_type_element(c: char) -> bool38 fn is_type_element(c: char) -> bool {
39     match c {
40         'b' |
41         'o' |
42         'x' |
43         'X' |
44         'e' |
45         'E' |
46         'f' |
47         'F' |
48         '%' |
49         's' |
50         '?' => true,
51         _ => false,
52     }
53 }
54 
55 // get an integer from pos, returning the number of bytes
56 // consumed and the integer
get_integer(s: &[u8], pos: usize) -> (usize, Option<i64>)57 fn get_integer(s: &[u8], pos: usize) -> (usize, Option<i64>) {
58     let (_, rest) = s.split_at(pos);
59     let mut consumed: usize = 0;
60     for b in rest {
61         match *b as char {
62             '0'...'9' => {}
63             _ => break,
64         };
65         consumed += 1;
66     }
67     if consumed == 0 {
68         (0, None)
69     } else {
70         let (intstr, _) = rest.split_at(consumed);
71         let val = unsafe {
72             // I think I can be reasonably sure that 0-9 chars are utf8 :)
73             match str::from_utf8_unchecked(intstr).parse::<i64>() {
74                 Ok(v) => Some(v),
75                 Err(_) => None,
76             }
77         };
78         (consumed, val)
79     }
80 }
81 
82 
83 #[derive(Debug)]
84 /// The format struct as it is defined in the python source
85 struct FmtPy {
86     pub fill: char,
87     pub align: char,
88     pub alternate: bool,
89     pub sign: char,
90     pub width: i64,
91     pub thousands: bool,
92     pub precision: i64,
93     pub ty: char,
94 }
95 
parse_like_python(rest: &str) -> Result<FmtPy>96 fn parse_like_python(rest: &str) -> Result<FmtPy> {
97     // The rest of this was pretty much strait up copied from python's format parser
98     // All credit goes to python source file: formatter_unicode.c
99     //
100 
101     let mut format = FmtPy {
102         fill: ' ',
103         align: '>',
104         alternate: false,
105         sign: '\0',
106         width: -1,
107         thousands: false,
108         precision: -1,
109         ty: '\0',
110     };
111     let mut chars = rest.chars();
112     let fake_fill = match chars.next() {
113         Some(c) => c,
114         None => return Ok(format),
115     };
116     // from now on all format characters MUST be valid
117     // ASCII characters (fill and identifier were the
118     // only ones that weren't.
119     // Therefore we can use bytes for the rest
120     let rest = rest.as_bytes();
121     let mut align_specified = false;
122     let mut fill_specified = false;
123 
124     let end: usize = rest.len();
125     let mut pos: usize = 0;
126 
127     // If the second char is an alignment token,
128     // then fake_fill as fill
129     if end - pos >= 1 + fake_fill.len_utf8() &&
130        is_alignment_token(rest[pos + fake_fill.len_utf8()] as char) {
131         format.align = rest[pos + fake_fill.len_utf8()] as char;
132         format.fill = fake_fill;
133         fill_specified = true;
134         align_specified = true;
135         pos += 1 + fake_fill.len_utf8();
136     } else if end - pos >= 1 && is_alignment_token(fake_fill) {
137         format.align = fake_fill;
138         pos += fake_fill.len_utf8();
139     }
140 
141     // Parse the various sign options
142     if end - pos >= 1 && is_sign_element(rest[pos] as char) {
143         format.sign = rest[pos] as char;
144         pos += 1;
145     }
146 
147     // If the next character is #, we're in alternate mode.  This only
148     // applies to integers.
149     if end - pos >= 1 && rest[pos] as char == '#' {
150         format.alternate = true;
151         pos += 1;
152     }
153 
154     // The special case for 0-padding (backwards compat)
155     if !fill_specified && end - pos >= 1 && rest[pos] == '0' as u8 {
156         format.fill = '0';
157         if !align_specified {
158             format.align = '=';
159         }
160         pos += 1;
161     }
162 
163     // check to make sure that val is good
164     let (consumed, val) = get_integer(rest, pos);
165     pos += consumed;
166     if consumed != 0 {
167         match val {
168             None => return Err(FmtError::Invalid("overflow error when parsing width".to_string())),
169             Some(v) => {
170                 format.width = v;
171             }
172         }
173     }
174 
175     // Comma signifies add thousands separators
176     if end - pos > 0 && rest[pos] as char == ',' {
177         format.thousands = true;
178         pos += 1;
179     }
180 
181     // Parse field precision
182     if end - pos > 0 && rest[pos] as char == '.' {
183         pos += 1;
184 
185         let (consumed, val) = get_integer(rest, pos);
186         if consumed != 0 {
187             match val {
188                 None => {
189                     return Err(FmtError::Invalid("overflow error when parsing precision"
190                                                      .to_string()))
191                 }
192                 Some(v) => {
193                     format.precision = v;
194                 }
195             }
196         } else {
197             // Not having a precision after a dot is an error.
198             if consumed == 0 {
199                 return Err(FmtError::Invalid("Format specifier missing precision".to_string()));
200             }
201         }
202         pos += consumed;
203 
204     }
205 
206     // Finally, parse the type field.
207     if end - pos > 1 {
208         // More than one char remain, invalid format specifier.
209         return Err(FmtError::Invalid("Invalid format specifier".to_string()));
210     }
211 
212     if end - pos == 1 {
213         format.ty = rest[pos] as char;
214         if !is_type_element(format.ty) {
215             let mut msg = String::new();
216             write!(msg, "Invalid type specifier: {:?}", format.ty).unwrap();
217             return Err(FmtError::TypeError(msg));
218         }
219         // pos+=1;
220     }
221 
222     // Do as much validating as we can, just by looking at the format
223     // specifier.  Do not take into account what type of formatting
224     // we're doing (int, float, string).
225     if format.thousands {
226         match format.ty {
227             'd' |
228             'e' |
229             'f' |
230             'g' |
231             'E' |
232             'G' |
233             '%' |
234             'F' |
235             '\0' => {} /* These are allowed. See PEP 378.*/
236 
237             _ => {
238                 let mut msg = String::new();
239                 write!(msg, "Invalid comma type: {}", format.ty).unwrap();
240                 return Err(FmtError::Invalid(msg));
241             }
242         }
243     }
244     Ok(format)
245 }
246 
247 impl<'a, 'b> Formatter<'a, 'b> {
248     /// create Formatter from format string
from_str(s: &'a str, buff: &'b mut String) -> Result<Formatter<'a, 'b>>249     pub fn from_str(s: &'a str, buff: &'b mut String) -> Result<Formatter<'a, 'b>> {
250         let mut found_colon = false;
251         let mut chars = s.chars();
252         let mut c = match chars.next() {
253             Some(':') | None => {
254                 return Err(FmtError::Invalid("must specify identifier".to_string()))
255             }
256             Some(c) => c,
257         };
258         let mut consumed = 0;
259         // find the identifier
260         loop {
261             consumed += c.len_utf8();
262             if c == ':' {
263                 found_colon = true;
264                 break;
265             }
266             c = match chars.next() {
267                 Some(c) => c,
268                 None => {
269                     break;
270                 }
271             };
272         }
273         let (identifier, rest) = s.split_at(consumed);
274         let identifier = if found_colon {
275             let (i, _) = identifier.split_at(identifier.len() - 1); // get rid of ':'
276             i
277         } else {
278             identifier
279         };
280 
281         let format = try!(parse_like_python(rest));
282 
283         Ok(Formatter {
284             key: identifier,
285             fill: format.fill,
286             align: match format.align {
287                 '<' => Alignment::Left,
288                 '^' => Alignment::Center,
289                 '>' => Alignment::Right,
290                 '=' => Alignment::Equal,
291                 _ => unreachable!(),
292             },
293             sign: match format.sign {
294                 '\0' => Sign::Unspecified,
295                 '+' => Sign::Plus,
296                 '-' => Sign::Minus,
297                 ' ' => Sign::Space,
298                 _ => unreachable!(),
299             },
300             alternate: format.alternate,
301             width: match format.width {
302                 -1 => None,
303                 _ => Some(format.width as usize),
304             },
305             thousands: format.thousands,
306             precision: match format.precision {
307                 -1 => None,
308                 _ => Some(format.precision as usize),
309             },
310             ty: match format.ty {
311                 '\0' => None,
312                 _ => Some(format.ty),
313             },
314             buff: buff,
315             pattern: s,
316         })
317     }
318 
319     /// call this to re-write the original format string verbatum
320     /// back to the output
skip(mut self) -> Result<()>321     pub fn skip(mut self) -> Result<()> {
322         self.buff.push('{');
323         self.write_str(self.pattern).unwrap();
324         self.buff.push('}');
325         Ok(())
326     }
327 
328 
329     /// fill getter
fill(&self) -> char330     pub fn fill(&self) -> char {
331         self.fill
332     }
333 
334     /// align getter
align(&self) -> Alignment335     pub fn align(&self) -> Alignment {
336         self.align.clone()
337     }
338 
339     /// width getter
width(&self) -> Option<usize>340     pub fn width(&self) -> Option<usize> {
341         self.width
342     }
343 
344     /// thousands getter
thousands(&self) -> bool345     pub fn thousands(&self) -> bool {
346         self.thousands
347     }
348 
349     /// precision getter
precision(&self) -> Option<usize>350     pub fn precision(&self) -> Option<usize> {
351         self.precision
352     }
353 
354     /// set precision to None, used for formatting int, float, etc
set_precision(&mut self, precision: Option<usize>)355     pub fn set_precision(&mut self, precision: Option<usize>) {
356         self.precision = precision;
357     }
358 
359     /// sign getter
sign(&self) -> Sign360     pub fn sign(&self) -> Sign {
361         self.sign.clone()
362     }
363 
364     /// sign plus getter
365     /// here because it is in fmt::Formatter
sign_plus(&self) -> bool366     pub fn sign_plus(&self) -> bool {
367         self.sign == Sign::Plus
368     }
369 
370     /// sign minus getter
371     /// here because it is in fmt::Formatter
sign_minus(&self) -> bool372     pub fn sign_minus(&self) -> bool {
373         self.sign == Sign::Minus
374     }
375 
376     /// alternate getter
alternate(&self) -> bool377     pub fn alternate(&self) -> bool {
378         self.alternate
379     }
380 
381     // sign_aware_zero_pad // Not supported
382 
383     /// type getter
ty(&self) -> Option<char>384     pub fn ty(&self) -> Option<char> {
385         self.ty
386     }
387 
388     /// UNSTABLE: in the future, this may return true if all validty
389     ///   checks for a float return true
390     /// return true if ty is valid for formatting integers
is_int_type(&self) -> bool391     pub fn is_int_type(&self) -> bool {
392         match self.ty {
393             None => true,
394             Some(c) => match c {
395                 'b' | 'o' | 'x' | 'X' => true,
396                 _ => false,
397             }
398         }
399     }
400 
401     /// UNSTABLE: in the future, this may return true if all validty
402     ///   checks for a float return true
403     /// return true if ty is valid for formatting floats
is_float_type(&self) -> bool404     pub fn is_float_type(&self) -> bool {
405         match self.ty {
406             None => true,
407             Some(c) => match c {
408                 'f' | 'e' | 'E' => true,
409                 _ => false,
410             }
411         }
412     }
413 }
414 
415 
416 impl<'a, 'b> fmt::Write for Formatter<'a, 'b> {
write_str(&mut self, s: &str) -> fmt::Result417     fn write_str(&mut self, s: &str) -> fmt::Result {
418         self.buff.write_str(s)
419     }
420 }
421