1 use crate::error::CalcError;
2 use crate::error::CalcError::*;
3 use crate::value::{Integral, Value};
4 use decimal::d128;
5 use num::Num;
6 use std::fmt;
7 use std::iter::Peekable;
8 
9 /// Tokens used for parsing an arithmetic expression
10 #[derive(Debug, Clone, PartialEq)]
11 pub enum Token {
12     Ans,
13     Plus,
14     Minus,
15     Divide,
16     Multiply,
17     Exponent,
18     Square,
19     Cube,
20     BitWiseAnd,
21     BitWiseOr,
22     BitWiseXor,
23     BitWiseNot,
24     BitWiseRShift,
25     BitWiseLShift,
26     Modulo,
27     OpenParen,
28     CloseParen,
29     Dice,
30     Number(Value),
31     Atom(String),
32 }
33 
34 impl fmt::Display for Token {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result35     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36         match *self {
37             Token::Ans => write!(f, "Ans"),
38             Token::Plus => write!(f, "Plus"),
39             Token::Minus => write!(f, "Minus"),
40             Token::Divide => write!(f, "Divide"),
41             Token::Multiply => write!(f, "Multiply"),
42             Token::Exponent => write!(f, "Exponent"),
43             Token::Square => write!(f, "Square"),
44             Token::Cube => write!(f, "Cube"),
45             Token::BitWiseAnd => write!(f, "And"),
46             Token::BitWiseOr => write!(f, "Or"),
47             Token::BitWiseXor => write!(f, "Xor"),
48             Token::BitWiseNot => write!(f, "Not"),
49             Token::BitWiseRShift => write!(f, "RShift"),
50             Token::BitWiseLShift => write!(f, "LShift"),
51             Token::Modulo => write!(f, "Modulo"),
52             Token::OpenParen => write!(f, "OpenParen"),
53             Token::CloseParen => write!(f, "CloseParen"),
54             Token::Dice => write!(f, "Dice"),
55             Token::Number(ref n) => write!(f, "'{}'", n),
56             Token::Atom(ref s) => write!(f, "'{}'", s),
57         }
58     }
59 }
60 
61 enum OperatorState {
62     PotentiallyIncomplete,
63     Complete,
64     NotAnOperator,
65 }
66 
67 trait IsOperator {
is_operator(self) -> bool68     fn is_operator(self) -> bool;
69 }
70 
71 impl IsOperator for char {
is_operator(self) -> bool72     fn is_operator(self) -> bool {
73         match self {
74             '+' | '-' | '/' | '^' | '²' | '³' | '&' | '|' | '~' | '>' | '%'
75             | '(' | ')' | '*' | '<' | 'd' => true,
76             _ => false,
77         }
78     }
79 }
80 
81 trait CheckOperator {
check_operator(self) -> OperatorState82     fn check_operator(self) -> OperatorState;
83 }
84 
85 impl CheckOperator for char {
check_operator(self) -> OperatorState86     fn check_operator(self) -> OperatorState {
87         match self {
88             '+' | '-' | '/' | '^' | '²' | '³' | '&' | '|' | '~' | '%' | '('
89             | ')' | 'd' => OperatorState::Complete,
90             '*' | '<' | '>' => OperatorState::PotentiallyIncomplete,
91             _ => OperatorState::NotAnOperator,
92         }
93     }
94 }
95 
96 trait OperatorMatch {
operator_type(self) -> Option<Token>97     fn operator_type(self) -> Option<Token>;
98 }
99 
100 impl OperatorMatch for [char; 2] {
operator_type(self) -> Option<Token>101     fn operator_type(self) -> Option<Token> {
102         if self == ['*', '*'] {
103             Some(Token::Exponent)
104         } else if self == ['<', '<'] {
105             Some(Token::BitWiseLShift)
106         } else if self == ['>', '>'] {
107             Some(Token::BitWiseRShift)
108         } else {
109             None
110         }
111     }
112 }
113 
114 impl OperatorMatch for char {
operator_type(self) -> Option<Token>115     fn operator_type(self) -> Option<Token> {
116         match self {
117             '+' => Some(Token::Plus),
118             '-' => Some(Token::Minus),
119             '/' => Some(Token::Divide),
120             '*' => Some(Token::Multiply),
121             '^' => Some(Token::BitWiseXor),
122             '²' => Some(Token::Square),
123             '³' => Some(Token::Cube),
124             '&' => Some(Token::BitWiseAnd),
125             '|' => Some(Token::BitWiseOr),
126             '~' => Some(Token::BitWiseNot),
127             '%' => Some(Token::Modulo),
128             '(' => Some(Token::OpenParen),
129             ')' => Some(Token::CloseParen),
130             'd' => Some(Token::Dice),
131             _ => None,
132         }
133     }
134 }
135 
136 /// Tokenizes a mathematical expression written written with the standard infix
137 /// notation.
138 ///
139 /// Returns a vector of `Token`s in the infix format, if the supplied
140 /// expression is valid. This
141 /// vector can then be pased into the `parse` function to be evaluated.
tokenize(input: &str) -> Result<Vec<Token>, CalcError>142 pub fn tokenize(input: &str) -> Result<Vec<Token>, CalcError> {
143     let mut tokens = Vec::with_capacity(input.len());
144     let mut chars = input.chars().peekable();
145 
146     while let Some(&c) = chars.peek() {
147         match c.check_operator() {
148             OperatorState::Complete => {
149                 tokens
150                     .push(c.operator_type().ok_or_else(|| InvalidOperator(c))?);
151                 chars.next();
152             }
153             OperatorState::PotentiallyIncomplete => {
154                 chars.next();
155                 match chars.peek() {
156                     Some(&next_char) if next_char.is_operator() => {
157                         tokens.push(
158                             [c, next_char]
159                                 .operator_type()
160                                 .ok_or_else(|| InvalidOperator(c))?,
161                         );
162                         chars.next();
163                     }
164                     _ => {
165                         tokens.push(
166                             c.operator_type()
167                                 .ok_or_else(|| InvalidOperator(c))?,
168                         );
169                     }
170                 }
171             }
172             OperatorState::NotAnOperator => {
173                 if c.is_whitespace() {
174                     chars.next();
175                 } else if c.is_alphabetic() {
176                     tokens.push(consume_ans_or_atom(&mut chars).into());
177                 } else if c.is_digit(16) || c == '.' {
178                     tokens.push(Token::Number(consume_number(&mut chars)?));
179                 } else {
180                     let token_string = consume_until_new_token(&mut chars);
181                     return Err(CalcError::UnrecognizedToken(token_string));
182                 }
183             }
184         }
185     }
186     Ok(tokens)
187 }
188 
189 /// Tokenizes a mathematical expression written with a polish (prefix) notation.
190 ///
191 /// Returns a vector of `Token`s in the infix format, if the supplied
192 /// expression is valid. This
193 /// vector can then be pased into the `parse` function to be evaluated.
tokenize_polish(input: &str) -> Result<Vec<Token>, CalcError>194 pub fn tokenize_polish(input: &str) -> Result<Vec<Token>, CalcError> {
195     // NOTE: This function isn't as efficient as it could be. For sake of
196     // compatibility with the
197     // existing infix parser, this function stores and re-arranges tokens in
198     // the infix format;
199     // adding extra parenthesis so that the order of operations aren't followed.
200 
201     // A temporary enum that is used for collecting polish values before they
202     // are converted into
203     // tokens for the infix format.
204     #[derive(Debug)]
205     enum PolishValue {
206         Ans,
207         Atom(String),
208         Number(Value),
209     }
210     impl From<PolishValue> for Token {
211         fn from(polish: PolishValue) -> Token {
212             match polish {
213                 PolishValue::Ans => Token::Ans,
214                 PolishValue::Atom(atom) => Token::Atom(atom),
215                 PolishValue::Number(val) => Token::Number(val),
216             }
217         }
218     }
219     impl From<Token> for PolishValue {
220         fn from(token: Token) -> PolishValue {
221             match token {
222                 Token::Ans => PolishValue::Ans,
223                 Token::Atom(atom) => PolishValue::Atom(atom),
224                 Token::Number(val) => PolishValue::Number(val),
225                 _ => unreachable!(),
226             }
227         }
228     }
229 
230     let mut chars = input.chars().peekable();
231     let mut operators: Vec<Token> = Vec::with_capacity(input.len() / 4);
232     let mut values: Vec<PolishValue> = Vec::with_capacity(input.len() / 3);
233     let mut tokens = Vec::with_capacity(input.len() / 2);
234     let mut parens = 0;
235 
236     'outer: loop {
237         // Collect the operators until a number is found.
238         while let Some(&c) = chars.peek() {
239             match c.check_operator() {
240                 OperatorState::Complete => {
241                     let token =
242                         c.operator_type().ok_or_else(|| InvalidOperator(c))?;
243                     if token != Token::OpenParen && token != Token::CloseParen {
244                         operators.push(token);
245                     }
246                     chars.next();
247                 }
248                 OperatorState::PotentiallyIncomplete => {
249                     chars.next();
250                     match chars.peek() {
251                         Some(&next_char) if next_char.is_operator() => {
252                             let token = [c, next_char]
253                                 .operator_type()
254                                 .ok_or_else(|| InvalidOperator(c))?;
255                             if token != Token::OpenParen
256                                 && token != Token::CloseParen
257                             {
258                                 operators.push(token);
259                             }
260                             chars.next();
261                         }
262                         _ => {
263                             let token = c
264                                 .operator_type()
265                                 .ok_or_else(|| InvalidOperator(c))?;
266                             if token != Token::OpenParen
267                                 && token != Token::CloseParen
268                             {
269                                 operators.push(token);
270                             }
271                         }
272                     }
273                 }
274                 OperatorState::NotAnOperator => {
275                     if c.is_whitespace() {
276                         chars.next();
277                     } else if c.is_alphabetic() {
278                         values.push(consume_ans_or_atom(&mut chars).into());
279                         break;
280                     } else if c.is_digit(16) || c == '.' {
281                         values.push(PolishValue::Number(consume_number(
282                             &mut chars,
283                         )?));
284                         break;
285                     } else {
286                         let token_string = consume_until_new_token(&mut chars);
287                         return Err(CalcError::UnrecognizedToken(token_string));
288                     }
289                 }
290             }
291         }
292 
293         // Then collect the atoms/numbers, resetting the looping if more
294         // operators are found.
295         while let Some(&c) = chars.peek() {
296             if c.is_alphabetic() {
297                 values.push(consume_ans_or_atom(&mut chars).into());
298             } else if c.is_digit(16) || c == '.' {
299                 values.push(PolishValue::Number(consume_number(&mut chars)?));
300             } else if c.is_whitespace() || c == ')' {
301                 let _ = chars.next();
302             } else if operators.len() != values.len() {
303                 // Either too many or too few values were supplied
304                 return Err(CalcError::UnexpectedEndOfInput);
305             } else {
306                 // This block executes when multiple sets of arithmetic
307                 // operations are entered.
308                 // Add parenthesis to work around the order of operations for
309                 // infix arithmetic.
310                 for _ in 1..operators.len() {
311                     tokens.push(Token::OpenParen);
312                 }
313 
314                 // Operators are processed in reverse, while numbers are
315                 // processed forwardly.
316                 let mut iterator = values
317                     .drain(..)
318                     .map(Token::from)
319                     .zip(operators.drain(..).rev());
320 
321                 // The first iteration will not include any closing parenthesis.
322                 if let Some((value, operator)) = iterator.next() {
323                     tokens.push(value);
324                     tokens.push(operator);
325                 }
326 
327                 // Each following iteration will append the second number in
328                 // the operation,
329                 // followed by a closing parenthesis and the next operation in
330                 // the list.
331                 for (value, operator) in iterator {
332                     tokens.push(value);
333                     tokens.push(Token::CloseParen);
334                     tokens.push(operator);
335                 }
336 
337                 tokens.push(Token::OpenParen);
338                 parens += 1;
339                 continue 'outer;
340             }
341         }
342 
343         if values.len() == 0 || operators.len() != values.len() - 1 {
344             return Err(CalcError::UnexpectedEndOfInput);
345         } else {
346             // Removes the last value from the values vector so that they are
347             // even.
348             // It will be re-added at the end once everything's been collected.
349             let last_value: Token = values.pop().unwrap().into();
350             // Add parenthesis to work around the order of operations for infix
351             // arithmetic.
352             for _ in 1..operators.len() {
353                 tokens.push(Token::OpenParen);
354             }
355             // Operators are processed in reverse, while numbers are processed
356             // forwardly.
357             let mut iterator = values
358                 .drain(..)
359                 .map(Token::from)
360                 .zip(operators.drain(..).rev());
361 
362             // The first iteration will not include any closing parenthesis.
363             if let Some((value, operator)) = iterator.next() {
364                 tokens.push(value);
365                 tokens.push(operator);
366             }
367 
368             // Each following iteration will append the second number in the
369             // operation,
370             // followed by a closing parenthesis and the next operation in the
371             // list.
372             for (value, operator) in iterator {
373                 tokens.push(value);
374                 tokens.push(Token::CloseParen);
375                 tokens.push(operator);
376             }
377 
378             tokens.push(last_value);
379             for _ in 0..parens {
380                 tokens.push(Token::CloseParen);
381             }
382             break 'outer;
383         }
384     }
385 
386     Ok(tokens)
387 }
388 
consume_ans_or_atom<I>(input: &mut Peekable<I>) -> Token where I: Iterator<Item = char>,389 fn consume_ans_or_atom<I>(input: &mut Peekable<I>) -> Token
390 where
391     I: Iterator<Item = char>,
392 {
393     let atom = consume_atom(input);
394     if atom.eq_ignore_ascii_case("ans") {
395         Token::Ans
396     } else {
397         Token::Atom(atom)
398     }
399 }
400 
digits<I>(input: &mut Peekable<I>, radix: u32) -> String where I: Iterator<Item = char>,401 fn digits<I>(input: &mut Peekable<I>, radix: u32) -> String
402 where
403     I: Iterator<Item = char>,
404 {
405     let mut number = String::new();
406     while let Some(&c) = input.peek() {
407         if c.is_digit(radix) {
408             number.push(c);
409         } else {
410             break;
411         }
412         input.next();
413     }
414     number
415 }
416 
consume_number<I>(input: &mut Peekable<I>) -> Result<Value, CalcError> where I: Iterator<Item = char>,417 fn consume_number<I>(input: &mut Peekable<I>) -> Result<Value, CalcError>
418 where
419     I: Iterator<Item = char>,
420 {
421     match input.peek() {
422         Some(&'0') => {
423             input.next();
424             match input.peek().cloned() {
425                 Some('x') | Some('X') => {
426                     input.next();
427                     let digits = digits(input, 16);
428                     let num = Integral::from_str_radix(&digits, 16)?;
429                     return Ok(Value::hex(num));
430                 }
431                 Some(c) if c.is_digit(16) || c == '.' => (),
432                 _ => return Ok(Value::dec(0)),
433             }
434         }
435         Some(_) => (),
436         None => return Err(CalcError::UnexpectedEndOfInput),
437     }
438     let mut whole = digits(input, 10);
439     if let Some(&'.') = input.peek() {
440         input.next();
441         let frac = digits(input, 10);
442         if whole.is_empty() && frac.is_empty() {
443             whole = "0".to_string();
444         }
445         let num = [whole, ".".into(), frac]
446             .concat()
447             .parse::<d128>()
448             .map_err(|_| CalcError::InvalidNumber("invalid float".into()))?;
449         Ok(Value::Float(num))
450     } else {
451         let res: Integral = whole.parse()?;
452         Ok(Value::dec(res))
453     }
454 }
455 
456 /// Consume a valid atom. An atom is defined by:
457 /// - Starting with an alphabetic character
458 /// - Consisting of alphanumeric characters or underscores
consume_atom<I: Iterator<Item = char>>(input: &mut Peekable<I>) -> String459 fn consume_atom<I: Iterator<Item = char>>(input: &mut Peekable<I>) -> String {
460     let mut atom = String::new();
461     while let Some(&c) = input.peek() {
462         if c.is_alphanumeric() || c == '_' {
463             atom.push(c);
464             input.next();
465         } else {
466             break;
467         }
468     }
469     atom
470 }
471 
consume_until_new_token<I: Iterator<Item = char>>(input: &mut I) -> String472 fn consume_until_new_token<I: Iterator<Item = char>>(input: &mut I) -> String {
473     input
474         .take_while(|c| {
475             !(c.is_whitespace() || c.is_operator() || c.is_digit(10))
476         })
477         .collect()
478 }
479 
480 #[cfg(test)]
481 mod tests {
482     use super::*;
483 
484     #[test]
normal()485     fn normal() {
486         let line = "(3 + 7) >> 10 * (7 % 2)";
487         let expected = vec![
488             Token::OpenParen,
489             Token::Number(Value::dec(3)),
490             Token::Plus,
491             Token::Number(Value::dec(7)),
492             Token::CloseParen,
493             Token::BitWiseRShift,
494             Token::Number(Value::dec(10)),
495             Token::Multiply,
496             Token::OpenParen,
497             Token::Number(Value::dec(7)),
498             Token::Modulo,
499             Token::Number(Value::dec(2)),
500             Token::CloseParen,
501         ];
502         assert_eq!(tokenize(line), Ok(expected));
503     }
504 
505     #[test]
zero()506     fn zero() {
507         let line = "0 + 0. + 0";
508         let expected = vec![
509             Token::Number(Value::dec(0)),
510             Token::Plus,
511             Token::Number(Value::Float(d128::from(0))),
512             Token::Plus,
513             Token::Number(Value::dec(0)),
514         ];
515         assert_eq!(tokenize(line), Ok(expected));
516     }
517 
518     #[test]
dice()519     fn dice() {
520         let line = "3d6";
521         let expected = vec![
522             Token::Number(Value::dec(3)),
523             Token::Dice,
524             Token::Number(Value::dec(6)),
525         ];
526         assert_eq!(tokenize(line), Ok(expected));
527     }
528 
529     #[test]
dice_polish()530     fn dice_polish() {
531         let line = "d 3 6";
532         let expected = vec![
533             Token::Number(Value::dec(3)),
534             Token::Dice,
535             Token::Number(Value::dec(6)),
536         ];
537         assert_eq!(tokenize_polish(line), Ok(expected));
538     }
539 
540     #[test]
ans()541     fn ans() {
542         let line = "ans*3";
543         let expected =
544             vec![Token::Ans, Token::Multiply, Token::Number(Value::dec(3))];
545         assert_eq!(tokenize(line), Ok(expected));
546     }
547 
548     #[test]
ans_polish()549     fn ans_polish() {
550         let line = "* ans 3";
551         let expected =
552             vec![Token::Ans, Token::Multiply, Token::Number(Value::dec(3))];
553         assert_eq!(tokenize_polish(line), Ok(expected));
554     }
555 
556     #[test]
ans_subtract_ans_polish()557     fn ans_subtract_ans_polish() {
558         let line = "- ans ans";
559         let expected = vec![Token::Ans, Token::Minus, Token::Ans];
560         assert_eq!(tokenize_polish(line), Ok(expected));
561     }
562 
563     #[test]
function_chaining()564     fn function_chaining() {
565         let line = "log 4 / log 2";
566         let expected = vec![
567             Token::Atom("log".into()),
568             Token::Number(Value::dec(4)),
569             Token::Divide,
570             Token::Atom("log".into()),
571             Token::Number(Value::dec(2)),
572         ];
573         assert_eq!(tokenize(line), Ok(expected));
574     }
575 
576     #[test]
hexadecimals()577     fn hexadecimals() {
578         let line = "0xDEADBEEF | 0xC0FFEE";
579         let expected = vec![
580             Token::Number(Value::hex(0xDEADBEEF as i64)),
581             Token::BitWiseOr,
582             Token::Number(Value::hex(0xC0FFEE)),
583         ];
584         assert_eq!(tokenize(line), Ok(expected));
585     }
586 }
587