1 // Copyright 2011 - 2014 Brian Marshall. All rights reserved.
2 //
3 // Use of this source code is governed by the BSD 2-Clause License that can be
4 // found in the LICENSE file.
5
6 #include <ctype.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdint.h>
12 #include "bitwise.h"
13 #include "shunting-yard.h"
14 #include "stack.h"
15
16 typedef enum {
17 TOKEN_NONE,
18 TOKEN_UNKNOWN,
19 TOKEN_OPEN_PARENTHESIS,
20 TOKEN_CLOSE_PARENTHESIS,
21 TOKEN_OPERATOR,
22 TOKEN_NUMBER,
23 TOKEN_IDENTIFIER
24 } TokenType;
25
26 typedef struct {
27 TokenType type;
28 char *value;
29 } Token;
30
31 typedef enum {
32 OPERATOR_OTHER,
33 OPERATOR_UNARY,
34 OPERATOR_BINARY
35 } OperatorArity;
36
37 typedef enum {
38 OPERATOR_NONE,
39 OPERATOR_LEFT,
40 OPERATOR_RIGHT
41 } OperatorAssociativity;
42
43 typedef struct {
44 char *symbol;
45 int op_len;
46 int precedence;
47 OperatorArity arity;
48 OperatorAssociativity associativity;
49 } Operator;
50
51 static const Token NO_TOKEN = {TOKEN_NONE, NULL};
52
53 static const Operator OPERATORS[] = {
54 {"!", 1, 1, OPERATOR_UNARY, OPERATOR_RIGHT},
55 {"~", 1, 1, OPERATOR_UNARY, OPERATOR_RIGHT},
56 {"*", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
57 {"/", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
58 {"%", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
59 {"+", 1, 3, OPERATOR_BINARY, OPERATOR_LEFT},
60 {"-", 1, 3, OPERATOR_BINARY, OPERATOR_LEFT},
61 {">>", 2, 4, OPERATOR_BINARY, OPERATOR_LEFT},
62 {"<<", 2, 4, OPERATOR_BINARY, OPERATOR_LEFT},
63 {"&", 1, 5, OPERATOR_BINARY, OPERATOR_LEFT},
64 {"^", 1, 5, OPERATOR_BINARY, OPERATOR_LEFT},
65 {"|", 1, 6, OPERATOR_BINARY, OPERATOR_LEFT},
66 {"(", 1, 7, OPERATOR_OTHER, OPERATOR_NONE}
67 };
68
69 // Returns an array of tokens extracted from the expression. The array is
70 // terminated by a token with type `TOKEN_NONE`.
71 static Token *tokenize(const char *expression);
72
73 // Parses a tokenized expression.
74 static Status parse(const Token *tokens, Stack **operands, Stack **operators,
75 Stack **functions);
76
77 // Pushes an operator to the stack after applying operators with a higher
78 // precedence.
79 static Status push_operator(const Operator *operator, Stack **operands,
80 Stack **operators);
81
82 // Pushes the multiplication operator to the stack.
83 static Status push_multiplication(Stack **operands, Stack **operators);
84
85 // Allocates memory for a double and pushes it to the stack.
86 static void push_num(uint64_t x, Stack **operands);
87
88 // Pops a double from the stack, frees its memory and returns its value.
89 static uint64_t pop_num(Stack **operands);
90
91 // Converts a string into a number and pushes it to the stack.
92 static Status push_number(const char *value, Stack **operands);
93
94 // Converts a constant identifier into its value and pushes it to the stack.
95 static Status push_constant(const char *value, Stack **operands);
96
97 // Applies an operator to the top one or two operands, depending on if the
98 // operator is unary or binary.
99 static Status apply_operator(const Operator *operator, Stack **operands);
100
101 // Applies a unary operator to the top operand.
102 static Status apply_unary_operator(const Operator *operator, Stack **operands);
103
104 // Applies a function to the top operand.
105 static Status apply_function(const char *function, Stack **operands);
106
107 // Returns the arity of an operator, using the previous token for context.
108 static OperatorArity get_arity(char *symbol, const Token *previous);
109
110 // Returns a matching operator.
111 static const Operator *get_operator(char *symbol, OperatorArity arity);
112
shunting_yard(const char * expression,uint64_t * result)113 Status shunting_yard(const char *expression, uint64_t *result)
114 {
115 Token *tokens = tokenize(expression);
116 Stack *operands = NULL, *operators = NULL, *functions = NULL;
117 Status status = parse(tokens, &operands, &operators, &functions);
118 if (operands)
119 *result = pop_num(&operands);
120 else if (status == STATUS_OK)
121 status = ERROR_NO_INPUT;
122
123 for (Token *token = tokens; token->type != TOKEN_NONE; token++)
124 free(token->value);
125 free(tokens);
126 while (operands)
127 pop_num(&operands);
128 while (operators)
129 stack_pop(&operators);
130 while (functions)
131 stack_pop(&functions);
132 return status;
133 }
134
135 #define MAX_TOKEN_SIZE 64
136
tokenize(const char * expression)137 Token *tokenize(const char *expression)
138 {
139 int length = 0;
140 Token *tokens = malloc(sizeof * tokens);
141 const char *c = expression;
142 while (*c) {
143 char cur_token[MAX_TOKEN_SIZE];
144 Token token = {TOKEN_UNKNOWN, NULL};
145
146 if (*c == '(')
147 token.type = TOKEN_OPEN_PARENTHESIS;
148 else if (*c == ')')
149 token.type = TOKEN_CLOSE_PARENTHESIS;
150 else if (!strncmp("<<", c, 2) || !strncmp(">>", c, 2)) {
151 token.type = TOKEN_OPERATOR;
152 token.value = strndup(c, 2);
153 } else if (strchr("~!^*/%+-|&", *c)) {
154 token.type = TOKEN_OPERATOR;
155 token.value = strndup(c, 1);
156 } else if (!strncmp("bit", c, 3) || !strncmp("BIT", c, 3)) {
157 token.value = strndup(c, 3);
158 token.type = TOKEN_IDENTIFIER;
159 } else if (sscanf(c, "%[xX0-9a-fA-F.]", cur_token)) {
160 token.type = TOKEN_NUMBER;
161 token.value = strdup(cur_token);
162 } else if (sscanf(c, "%[A-Za-z$]", cur_token)) {
163 token.type = TOKEN_IDENTIFIER;
164 token.value = strdup(cur_token);
165 }
166
167 if (!isspace(*c)) {
168 tokens = realloc(tokens, sizeof * tokens *
169 (++length + 1));
170 tokens[length - 1] = token;
171 }
172 c += token.value ? strlen(token.value) : 1;
173 }
174 tokens[length] = NO_TOKEN;
175
176 return tokens;
177 }
178
parse(const Token * tokens,Stack ** operands,Stack ** operators,Stack ** functions)179 Status parse(const Token *tokens, Stack **operands, Stack **operators,
180 Stack **functions)
181 {
182 Status status = STATUS_OK;
183
184 for (const Token *token = tokens, *previous = &NO_TOKEN, *next = token + 1;
185 token->type != TOKEN_NONE; previous = token, token = next++) {
186 switch (token->type) {
187 case TOKEN_OPEN_PARENTHESIS:
188 // Implicit multiplication: "(2)(2)".
189 if (previous->type == TOKEN_CLOSE_PARENTHESIS)
190 status = push_multiplication(operands, operators);
191
192 stack_push(operators, get_operator("(", OPERATOR_OTHER));
193 break;
194
195 case TOKEN_CLOSE_PARENTHESIS: {
196 // Apply operators until the previous open parenthesis is found.
197 bool found_parenthesis = false;
198 while (*operators && status == STATUS_OK && !found_parenthesis) {
199 const Operator *operator = stack_pop(operators);
200 if (!strncmp(operator->symbol, "(", 1))
201 found_parenthesis = true;
202 else
203 status = apply_operator(operator, operands);
204 }
205 if (!found_parenthesis)
206 status = ERROR_CLOSE_PARENTHESIS;
207 else if (*functions)
208 status = apply_function(stack_pop(functions), operands);
209 break;
210 }
211
212 case TOKEN_OPERATOR:
213 status = push_operator(
214 get_operator(token->value,
215 get_arity(token->value, previous)),
216 operands, operators);
217 break;
218
219 case TOKEN_NUMBER:
220 if (previous->type == TOKEN_CLOSE_PARENTHESIS ||
221 previous->type == TOKEN_NUMBER ||
222 previous->type == TOKEN_IDENTIFIER)
223 status = ERROR_SYNTAX;
224 else {
225 status = push_number(token->value, operands);
226
227 // Implicit multiplication: "2(2)" or "2a".
228 if (next->type == TOKEN_OPEN_PARENTHESIS ||
229 next->type == TOKEN_IDENTIFIER)
230 status = push_multiplication(operands, operators);
231 }
232 break;
233
234 case TOKEN_IDENTIFIER:
235 // The identifier could be either a constant or function.
236 status = push_constant(token->value, operands);
237 if ((status == ERROR_UNDEFINED_CONSTANT) &&
238 (next->type == TOKEN_OPEN_PARENTHESIS)) {
239 stack_push(functions, token->value);
240 status = STATUS_OK;
241 } else if (next->type == TOKEN_OPEN_PARENTHESIS ||
242 next->type == TOKEN_IDENTIFIER) {
243 // Implicit multiplication: "a(2)" or "a b".
244 status = push_multiplication(operands, operators);
245 }
246 break;
247
248 default:
249 status = ERROR_UNRECOGNIZED;
250 }
251 if (status != STATUS_OK)
252 return status;
253 }
254
255 // Apply all remaining operators.
256 while (*operators && status == STATUS_OK) {
257 const Operator *operator = stack_pop(operators);
258 if (!strncmp(operator->symbol, "(", 1))
259 status = ERROR_OPEN_PARENTHESIS;
260 else
261 status = apply_operator(operator, operands);
262 }
263 return status;
264 }
265
push_operator(const Operator * operator,Stack ** operands,Stack ** operators)266 Status push_operator(const Operator *operator, Stack **operands,
267 Stack **operators)
268 {
269 if (!operator)
270 return ERROR_SYNTAX;
271
272 Status status = STATUS_OK;
273 while (*operators && status == STATUS_OK) {
274 const Operator *stack_operator = stack_top(*operators);
275 if (operator->arity == OPERATOR_UNARY ||
276 operator->precedence < stack_operator->precedence ||
277 (operator->associativity == OPERATOR_RIGHT &&
278 operator->precedence == stack_operator->precedence))
279 break;
280
281 status = apply_operator(stack_pop(operators), operands);
282 }
283 stack_push(operators, operator);
284 return status;
285 }
286
push_multiplication(Stack ** operands,Stack ** operators)287 Status push_multiplication(Stack **operands, Stack **operators)
288 {
289 return push_operator(get_operator("*", OPERATOR_BINARY), operands,
290 operators);
291 }
292
push_num(uint64_t x,Stack ** operands)293 void push_num(uint64_t x, Stack **operands)
294 {
295 uint64_t *pointer = malloc(sizeof * pointer);
296 *pointer = x;
297 stack_push(operands, pointer);
298 }
299
pop_num(Stack ** operands)300 uint64_t pop_num(Stack **operands)
301 {
302 const uint64_t *pointer = stack_pop(operands);
303 uint64_t x = *pointer;
304 free((void *)pointer);
305 return x;
306 }
307
push_number(const char * value,Stack ** operands)308 Status push_number(const char *value, Stack **operands)
309 {
310 uint64_t x;
311
312 if (parse_input(value, &x))
313 return ERROR_SYNTAX;
314
315 push_num(x, operands);
316 return STATUS_OK;
317 }
318
push_constant(const char * value,Stack ** operands)319 Status push_constant(const char *value, Stack **operands)
320 {
321 uint64_t x;
322
323 if (strncmp(value, "$", 1) == 0)
324 x = g_val;
325 else
326 return ERROR_UNDEFINED_CONSTANT;
327
328 push_num(x, operands);
329 return STATUS_OK;
330 }
331
apply_operator(const Operator * operator,Stack ** operands)332 Status apply_operator(const Operator *operator, Stack **operands)
333 {
334 if (!operator || !*operands)
335 return ERROR_SYNTAX;
336 if (operator->arity == OPERATOR_UNARY)
337 return apply_unary_operator(operator, operands);
338
339 uint64_t y = pop_num(operands);
340 if (!*operands)
341 return ERROR_SYNTAX;
342 uint64_t x = pop_num(operands);
343 Status status = STATUS_OK;
344 switch (*operator->symbol) {
345 case '*':
346 x = x * y;
347 break;
348 case '/':
349 x = x / y;
350 break;
351 case '%':
352 x = x % y;
353 break;
354 case '+':
355 x = x + y;
356 break;
357 case '-':
358 x = x - y;
359 break;
360 case '>':
361 x = x >> y;
362 break;
363 case '<':
364 x = x << y;
365 break;
366 case '|':
367 x = x | y;
368 break;
369 case '&':
370 x = x & y;
371 break;
372 case '^':
373 x = x ^ y;
374 break;
375 default:
376 return ERROR_UNRECOGNIZED;
377 }
378 push_num(x, operands);
379 return status;
380 }
381
apply_unary_operator(const Operator * operator,Stack ** operands)382 Status apply_unary_operator(const Operator *operator, Stack **operands)
383 {
384 uint64_t x = pop_num(operands);
385 switch (*operator->symbol) {
386 case '+':
387 break;
388 case '-':
389 x = -x;
390 break;
391 case '~':
392 x = ~x;
393 break;
394 case '!':
395 x = !x;
396 break;
397 default:
398 return ERROR_UNRECOGNIZED;
399 }
400 push_num(x, operands);
401 return STATUS_OK;
402 }
403
apply_function(const char * function,Stack ** operands)404 Status apply_function(const char *function, Stack **operands)
405 {
406 if (!*operands)
407 return ERROR_FUNCTION_ARGUMENTS;
408
409 uint64_t x = pop_num(operands);
410
411 if (strcasecmp(function, "bit") == 0)
412 x = 1 << x;
413 else
414 return ERROR_UNDEFINED_FUNCTION;
415
416 push_num(x, operands);
417 return STATUS_OK;
418 }
419
get_arity(char * symbol,const Token * previous)420 OperatorArity get_arity(char *symbol, const Token *previous)
421 {
422 if (*symbol == '!' || previous->type == TOKEN_NONE ||
423 previous->type == TOKEN_OPEN_PARENTHESIS ||
424 (previous->type == TOKEN_OPERATOR && *previous->value != '!'))
425 return OPERATOR_UNARY;
426 return OPERATOR_BINARY;
427 }
428
get_operator(char * symbol,OperatorArity arity)429 const Operator *get_operator(char *symbol, OperatorArity arity)
430 {
431 for (size_t i = 0; i < sizeof OPERATORS / sizeof OPERATORS[0]; i++) {
432 if (!strncmp(OPERATORS[i].symbol, symbol, OPERATORS[i].op_len) &&
433 OPERATORS[i].arity == arity)
434 return &OPERATORS[i];
435 }
436
437 LOG("couldn't find %s operator\n", symbol);
438 return NULL;
439 }
440