1 use crate::error::CalcError;
2 use crate::error::CalcError::*;
3 use crate::value::{Integral, Value};
4 use decimal::d128;
5 use num::Num;
6 use std::fmt;
7 use std::iter::Peekable;
8
9 /// Tokens used for parsing an arithmetic expression
10 #[derive(Debug, Clone, PartialEq)]
11 pub enum Token {
12 Ans,
13 Plus,
14 Minus,
15 Divide,
16 Multiply,
17 Exponent,
18 Square,
19 Cube,
20 BitWiseAnd,
21 BitWiseOr,
22 BitWiseXor,
23 BitWiseNot,
24 BitWiseRShift,
25 BitWiseLShift,
26 Modulo,
27 OpenParen,
28 CloseParen,
29 Dice,
30 Number(Value),
31 Atom(String),
32 }
33
34 impl fmt::Display for Token {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result35 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
36 match *self {
37 Token::Ans => write!(f, "Ans"),
38 Token::Plus => write!(f, "Plus"),
39 Token::Minus => write!(f, "Minus"),
40 Token::Divide => write!(f, "Divide"),
41 Token::Multiply => write!(f, "Multiply"),
42 Token::Exponent => write!(f, "Exponent"),
43 Token::Square => write!(f, "Square"),
44 Token::Cube => write!(f, "Cube"),
45 Token::BitWiseAnd => write!(f, "And"),
46 Token::BitWiseOr => write!(f, "Or"),
47 Token::BitWiseXor => write!(f, "Xor"),
48 Token::BitWiseNot => write!(f, "Not"),
49 Token::BitWiseRShift => write!(f, "RShift"),
50 Token::BitWiseLShift => write!(f, "LShift"),
51 Token::Modulo => write!(f, "Modulo"),
52 Token::OpenParen => write!(f, "OpenParen"),
53 Token::CloseParen => write!(f, "CloseParen"),
54 Token::Dice => write!(f, "Dice"),
55 Token::Number(ref n) => write!(f, "'{}'", n),
56 Token::Atom(ref s) => write!(f, "'{}'", s),
57 }
58 }
59 }
60
61 enum OperatorState {
62 PotentiallyIncomplete,
63 Complete,
64 NotAnOperator,
65 }
66
67 trait IsOperator {
is_operator(self) -> bool68 fn is_operator(self) -> bool;
69 }
70
71 impl IsOperator for char {
is_operator(self) -> bool72 fn is_operator(self) -> bool {
73 match self {
74 '+' | '-' | '/' | '^' | '²' | '³' | '&' | '|' | '~' | '>' | '%'
75 | '(' | ')' | '*' | '<' | 'd' => true,
76 _ => false,
77 }
78 }
79 }
80
81 trait CheckOperator {
check_operator(self) -> OperatorState82 fn check_operator(self) -> OperatorState;
83 }
84
85 impl CheckOperator for char {
check_operator(self) -> OperatorState86 fn check_operator(self) -> OperatorState {
87 match self {
88 '+' | '-' | '/' | '^' | '²' | '³' | '&' | '|' | '~' | '%' | '('
89 | ')' | 'd' => OperatorState::Complete,
90 '*' | '<' | '>' => OperatorState::PotentiallyIncomplete,
91 _ => OperatorState::NotAnOperator,
92 }
93 }
94 }
95
96 trait OperatorMatch {
operator_type(self) -> Option<Token>97 fn operator_type(self) -> Option<Token>;
98 }
99
100 impl OperatorMatch for [char; 2] {
operator_type(self) -> Option<Token>101 fn operator_type(self) -> Option<Token> {
102 if self == ['*', '*'] {
103 Some(Token::Exponent)
104 } else if self == ['<', '<'] {
105 Some(Token::BitWiseLShift)
106 } else if self == ['>', '>'] {
107 Some(Token::BitWiseRShift)
108 } else {
109 None
110 }
111 }
112 }
113
114 impl OperatorMatch for char {
operator_type(self) -> Option<Token>115 fn operator_type(self) -> Option<Token> {
116 match self {
117 '+' => Some(Token::Plus),
118 '-' => Some(Token::Minus),
119 '/' => Some(Token::Divide),
120 '*' => Some(Token::Multiply),
121 '^' => Some(Token::BitWiseXor),
122 '²' => Some(Token::Square),
123 '³' => Some(Token::Cube),
124 '&' => Some(Token::BitWiseAnd),
125 '|' => Some(Token::BitWiseOr),
126 '~' => Some(Token::BitWiseNot),
127 '%' => Some(Token::Modulo),
128 '(' => Some(Token::OpenParen),
129 ')' => Some(Token::CloseParen),
130 'd' => Some(Token::Dice),
131 _ => None,
132 }
133 }
134 }
135
136 /// Tokenizes a mathematical expression written written with the standard infix
137 /// notation.
138 ///
139 /// Returns a vector of `Token`s in the infix format, if the supplied
140 /// expression is valid. This
141 /// vector can then be pased into the `parse` function to be evaluated.
tokenize(input: &str) -> Result<Vec<Token>, CalcError>142 pub fn tokenize(input: &str) -> Result<Vec<Token>, CalcError> {
143 let mut tokens = Vec::with_capacity(input.len());
144 let mut chars = input.chars().peekable();
145
146 while let Some(&c) = chars.peek() {
147 match c.check_operator() {
148 OperatorState::Complete => {
149 tokens
150 .push(c.operator_type().ok_or_else(|| InvalidOperator(c))?);
151 chars.next();
152 }
153 OperatorState::PotentiallyIncomplete => {
154 chars.next();
155 match chars.peek() {
156 Some(&next_char) if next_char.is_operator() => {
157 tokens.push(
158 [c, next_char]
159 .operator_type()
160 .ok_or_else(|| InvalidOperator(c))?,
161 );
162 chars.next();
163 }
164 _ => {
165 tokens.push(
166 c.operator_type()
167 .ok_or_else(|| InvalidOperator(c))?,
168 );
169 }
170 }
171 }
172 OperatorState::NotAnOperator => {
173 if c.is_whitespace() {
174 chars.next();
175 } else if c.is_alphabetic() {
176 tokens.push(consume_ans_or_atom(&mut chars).into());
177 } else if c.is_digit(16) || c == '.' {
178 tokens.push(Token::Number(consume_number(&mut chars)?));
179 } else {
180 let token_string = consume_until_new_token(&mut chars);
181 return Err(CalcError::UnrecognizedToken(token_string));
182 }
183 }
184 }
185 }
186 Ok(tokens)
187 }
188
189 /// Tokenizes a mathematical expression written with a polish (prefix) notation.
190 ///
191 /// Returns a vector of `Token`s in the infix format, if the supplied
192 /// expression is valid. This
193 /// vector can then be pased into the `parse` function to be evaluated.
tokenize_polish(input: &str) -> Result<Vec<Token>, CalcError>194 pub fn tokenize_polish(input: &str) -> Result<Vec<Token>, CalcError> {
195 // NOTE: This function isn't as efficient as it could be. For sake of
196 // compatibility with the
197 // existing infix parser, this function stores and re-arranges tokens in
198 // the infix format;
199 // adding extra parenthesis so that the order of operations aren't followed.
200
201 // A temporary enum that is used for collecting polish values before they
202 // are converted into
203 // tokens for the infix format.
204 #[derive(Debug)]
205 enum PolishValue {
206 Ans,
207 Atom(String),
208 Number(Value),
209 }
210 impl From<PolishValue> for Token {
211 fn from(polish: PolishValue) -> Token {
212 match polish {
213 PolishValue::Ans => Token::Ans,
214 PolishValue::Atom(atom) => Token::Atom(atom),
215 PolishValue::Number(val) => Token::Number(val),
216 }
217 }
218 }
219 impl From<Token> for PolishValue {
220 fn from(token: Token) -> PolishValue {
221 match token {
222 Token::Ans => PolishValue::Ans,
223 Token::Atom(atom) => PolishValue::Atom(atom),
224 Token::Number(val) => PolishValue::Number(val),
225 _ => unreachable!(),
226 }
227 }
228 }
229
230 let mut chars = input.chars().peekable();
231 let mut operators: Vec<Token> = Vec::with_capacity(input.len() / 4);
232 let mut values: Vec<PolishValue> = Vec::with_capacity(input.len() / 3);
233 let mut tokens = Vec::with_capacity(input.len() / 2);
234 let mut parens = 0;
235
236 'outer: loop {
237 // Collect the operators until a number is found.
238 while let Some(&c) = chars.peek() {
239 match c.check_operator() {
240 OperatorState::Complete => {
241 let token =
242 c.operator_type().ok_or_else(|| InvalidOperator(c))?;
243 if token != Token::OpenParen && token != Token::CloseParen {
244 operators.push(token);
245 }
246 chars.next();
247 }
248 OperatorState::PotentiallyIncomplete => {
249 chars.next();
250 match chars.peek() {
251 Some(&next_char) if next_char.is_operator() => {
252 let token = [c, next_char]
253 .operator_type()
254 .ok_or_else(|| InvalidOperator(c))?;
255 if token != Token::OpenParen
256 && token != Token::CloseParen
257 {
258 operators.push(token);
259 }
260 chars.next();
261 }
262 _ => {
263 let token = c
264 .operator_type()
265 .ok_or_else(|| InvalidOperator(c))?;
266 if token != Token::OpenParen
267 && token != Token::CloseParen
268 {
269 operators.push(token);
270 }
271 }
272 }
273 }
274 OperatorState::NotAnOperator => {
275 if c.is_whitespace() {
276 chars.next();
277 } else if c.is_alphabetic() {
278 values.push(consume_ans_or_atom(&mut chars).into());
279 break;
280 } else if c.is_digit(16) || c == '.' {
281 values.push(PolishValue::Number(consume_number(
282 &mut chars,
283 )?));
284 break;
285 } else {
286 let token_string = consume_until_new_token(&mut chars);
287 return Err(CalcError::UnrecognizedToken(token_string));
288 }
289 }
290 }
291 }
292
293 // Then collect the atoms/numbers, resetting the looping if more
294 // operators are found.
295 while let Some(&c) = chars.peek() {
296 if c.is_alphabetic() {
297 values.push(consume_ans_or_atom(&mut chars).into());
298 } else if c.is_digit(16) || c == '.' {
299 values.push(PolishValue::Number(consume_number(&mut chars)?));
300 } else if c.is_whitespace() || c == ')' {
301 let _ = chars.next();
302 } else if operators.len() != values.len() {
303 // Either too many or too few values were supplied
304 return Err(CalcError::UnexpectedEndOfInput);
305 } else {
306 // This block executes when multiple sets of arithmetic
307 // operations are entered.
308 // Add parenthesis to work around the order of operations for
309 // infix arithmetic.
310 for _ in 1..operators.len() {
311 tokens.push(Token::OpenParen);
312 }
313
314 // Operators are processed in reverse, while numbers are
315 // processed forwardly.
316 let mut iterator = values
317 .drain(..)
318 .map(Token::from)
319 .zip(operators.drain(..).rev());
320
321 // The first iteration will not include any closing parenthesis.
322 if let Some((value, operator)) = iterator.next() {
323 tokens.push(value);
324 tokens.push(operator);
325 }
326
327 // Each following iteration will append the second number in
328 // the operation,
329 // followed by a closing parenthesis and the next operation in
330 // the list.
331 for (value, operator) in iterator {
332 tokens.push(value);
333 tokens.push(Token::CloseParen);
334 tokens.push(operator);
335 }
336
337 tokens.push(Token::OpenParen);
338 parens += 1;
339 continue 'outer;
340 }
341 }
342
343 if values.len() == 0 || operators.len() != values.len() - 1 {
344 return Err(CalcError::UnexpectedEndOfInput);
345 } else {
346 // Removes the last value from the values vector so that they are
347 // even.
348 // It will be re-added at the end once everything's been collected.
349 let last_value: Token = values.pop().unwrap().into();
350 // Add parenthesis to work around the order of operations for infix
351 // arithmetic.
352 for _ in 1..operators.len() {
353 tokens.push(Token::OpenParen);
354 }
355 // Operators are processed in reverse, while numbers are processed
356 // forwardly.
357 let mut iterator = values
358 .drain(..)
359 .map(Token::from)
360 .zip(operators.drain(..).rev());
361
362 // The first iteration will not include any closing parenthesis.
363 if let Some((value, operator)) = iterator.next() {
364 tokens.push(value);
365 tokens.push(operator);
366 }
367
368 // Each following iteration will append the second number in the
369 // operation,
370 // followed by a closing parenthesis and the next operation in the
371 // list.
372 for (value, operator) in iterator {
373 tokens.push(value);
374 tokens.push(Token::CloseParen);
375 tokens.push(operator);
376 }
377
378 tokens.push(last_value);
379 for _ in 0..parens {
380 tokens.push(Token::CloseParen);
381 }
382 break 'outer;
383 }
384 }
385
386 Ok(tokens)
387 }
388
consume_ans_or_atom<I>(input: &mut Peekable<I>) -> Token where I: Iterator<Item = char>,389 fn consume_ans_or_atom<I>(input: &mut Peekable<I>) -> Token
390 where
391 I: Iterator<Item = char>,
392 {
393 let atom = consume_atom(input);
394 if atom.eq_ignore_ascii_case("ans") {
395 Token::Ans
396 } else {
397 Token::Atom(atom)
398 }
399 }
400
digits<I>(input: &mut Peekable<I>, radix: u32) -> String where I: Iterator<Item = char>,401 fn digits<I>(input: &mut Peekable<I>, radix: u32) -> String
402 where
403 I: Iterator<Item = char>,
404 {
405 let mut number = String::new();
406 while let Some(&c) = input.peek() {
407 if c.is_digit(radix) {
408 number.push(c);
409 } else {
410 break;
411 }
412 input.next();
413 }
414 number
415 }
416
consume_number<I>(input: &mut Peekable<I>) -> Result<Value, CalcError> where I: Iterator<Item = char>,417 fn consume_number<I>(input: &mut Peekable<I>) -> Result<Value, CalcError>
418 where
419 I: Iterator<Item = char>,
420 {
421 match input.peek() {
422 Some(&'0') => {
423 input.next();
424 match input.peek().cloned() {
425 Some('x') | Some('X') => {
426 input.next();
427 let digits = digits(input, 16);
428 let num = Integral::from_str_radix(&digits, 16)?;
429 return Ok(Value::hex(num));
430 }
431 Some(c) if c.is_digit(16) || c == '.' => (),
432 _ => return Ok(Value::dec(0)),
433 }
434 }
435 Some(_) => (),
436 None => return Err(CalcError::UnexpectedEndOfInput),
437 }
438 let mut whole = digits(input, 10);
439 if let Some(&'.') = input.peek() {
440 input.next();
441 let frac = digits(input, 10);
442 if whole.is_empty() && frac.is_empty() {
443 whole = "0".to_string();
444 }
445 let num = [whole, ".".into(), frac]
446 .concat()
447 .parse::<d128>()
448 .map_err(|_| CalcError::InvalidNumber("invalid float".into()))?;
449 Ok(Value::Float(num))
450 } else {
451 let res: Integral = whole.parse()?;
452 Ok(Value::dec(res))
453 }
454 }
455
456 /// Consume a valid atom. An atom is defined by:
457 /// - Starting with an alphabetic character
458 /// - Consisting of alphanumeric characters or underscores
consume_atom<I: Iterator<Item = char>>(input: &mut Peekable<I>) -> String459 fn consume_atom<I: Iterator<Item = char>>(input: &mut Peekable<I>) -> String {
460 let mut atom = String::new();
461 while let Some(&c) = input.peek() {
462 if c.is_alphanumeric() || c == '_' {
463 atom.push(c);
464 input.next();
465 } else {
466 break;
467 }
468 }
469 atom
470 }
471
consume_until_new_token<I: Iterator<Item = char>>(input: &mut I) -> String472 fn consume_until_new_token<I: Iterator<Item = char>>(input: &mut I) -> String {
473 input
474 .take_while(|c| {
475 !(c.is_whitespace() || c.is_operator() || c.is_digit(10))
476 })
477 .collect()
478 }
479
480 #[cfg(test)]
481 mod tests {
482 use super::*;
483
484 #[test]
normal()485 fn normal() {
486 let line = "(3 + 7) >> 10 * (7 % 2)";
487 let expected = vec![
488 Token::OpenParen,
489 Token::Number(Value::dec(3)),
490 Token::Plus,
491 Token::Number(Value::dec(7)),
492 Token::CloseParen,
493 Token::BitWiseRShift,
494 Token::Number(Value::dec(10)),
495 Token::Multiply,
496 Token::OpenParen,
497 Token::Number(Value::dec(7)),
498 Token::Modulo,
499 Token::Number(Value::dec(2)),
500 Token::CloseParen,
501 ];
502 assert_eq!(tokenize(line), Ok(expected));
503 }
504
505 #[test]
zero()506 fn zero() {
507 let line = "0 + 0. + 0";
508 let expected = vec![
509 Token::Number(Value::dec(0)),
510 Token::Plus,
511 Token::Number(Value::Float(d128::from(0))),
512 Token::Plus,
513 Token::Number(Value::dec(0)),
514 ];
515 assert_eq!(tokenize(line), Ok(expected));
516 }
517
518 #[test]
dice()519 fn dice() {
520 let line = "3d6";
521 let expected = vec![
522 Token::Number(Value::dec(3)),
523 Token::Dice,
524 Token::Number(Value::dec(6)),
525 ];
526 assert_eq!(tokenize(line), Ok(expected));
527 }
528
529 #[test]
dice_polish()530 fn dice_polish() {
531 let line = "d 3 6";
532 let expected = vec![
533 Token::Number(Value::dec(3)),
534 Token::Dice,
535 Token::Number(Value::dec(6)),
536 ];
537 assert_eq!(tokenize_polish(line), Ok(expected));
538 }
539
540 #[test]
ans()541 fn ans() {
542 let line = "ans*3";
543 let expected =
544 vec![Token::Ans, Token::Multiply, Token::Number(Value::dec(3))];
545 assert_eq!(tokenize(line), Ok(expected));
546 }
547
548 #[test]
ans_polish()549 fn ans_polish() {
550 let line = "* ans 3";
551 let expected =
552 vec![Token::Ans, Token::Multiply, Token::Number(Value::dec(3))];
553 assert_eq!(tokenize_polish(line), Ok(expected));
554 }
555
556 #[test]
ans_subtract_ans_polish()557 fn ans_subtract_ans_polish() {
558 let line = "- ans ans";
559 let expected = vec![Token::Ans, Token::Minus, Token::Ans];
560 assert_eq!(tokenize_polish(line), Ok(expected));
561 }
562
563 #[test]
function_chaining()564 fn function_chaining() {
565 let line = "log 4 / log 2";
566 let expected = vec![
567 Token::Atom("log".into()),
568 Token::Number(Value::dec(4)),
569 Token::Divide,
570 Token::Atom("log".into()),
571 Token::Number(Value::dec(2)),
572 ];
573 assert_eq!(tokenize(line), Ok(expected));
574 }
575
576 #[test]
hexadecimals()577 fn hexadecimals() {
578 let line = "0xDEADBEEF | 0xC0FFEE";
579 let expected = vec![
580 Token::Number(Value::hex(0xDEADBEEF as i64)),
581 Token::BitWiseOr,
582 Token::Number(Value::hex(0xC0FFEE)),
583 ];
584 assert_eq!(tokenize(line), Ok(expected));
585 }
586 }
587