1 // Copyright 2011 - 2014 Brian Marshall. All rights reserved.
2 //
3 // Use of this source code is governed by the BSD 2-Clause License that can be
4 // found in the LICENSE file.
5 
6 #include <ctype.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdint.h>
12 #include "bitwise.h"
13 #include "shunting-yard.h"
14 #include "stack.h"
15 
16 typedef enum {
17 	TOKEN_NONE,
18 	TOKEN_UNKNOWN,
19 	TOKEN_OPEN_PARENTHESIS,
20 	TOKEN_CLOSE_PARENTHESIS,
21 	TOKEN_OPERATOR,
22 	TOKEN_NUMBER,
23 	TOKEN_IDENTIFIER
24 } TokenType;
25 
26 typedef struct {
27 	TokenType type;
28 	char *value;
29 } Token;
30 
31 typedef enum {
32 	OPERATOR_OTHER,
33 	OPERATOR_UNARY,
34 	OPERATOR_BINARY
35 } OperatorArity;
36 
37 typedef enum {
38 	OPERATOR_NONE,
39 	OPERATOR_LEFT,
40 	OPERATOR_RIGHT
41 } OperatorAssociativity;
42 
43 typedef struct {
44 	char *symbol;
45 	int op_len;
46 	int precedence;
47 	OperatorArity arity;
48 	OperatorAssociativity associativity;
49 } Operator;
50 
51 static const Token NO_TOKEN = {TOKEN_NONE, NULL};
52 
53 static const Operator OPERATORS[] = {
54 	{"!", 1, 1, OPERATOR_UNARY,  OPERATOR_RIGHT},
55 	{"~", 1, 1, OPERATOR_UNARY,  OPERATOR_RIGHT},
56 	{"*", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
57 	{"/", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
58 	{"%", 1, 2, OPERATOR_BINARY, OPERATOR_LEFT},
59 	{"+", 1, 3, OPERATOR_BINARY, OPERATOR_LEFT},
60 	{"-", 1, 3, OPERATOR_BINARY, OPERATOR_LEFT},
61 	{">>", 2, 4, OPERATOR_BINARY, OPERATOR_LEFT},
62 	{"<<", 2, 4, OPERATOR_BINARY, OPERATOR_LEFT},
63 	{"&", 1, 5, OPERATOR_BINARY, OPERATOR_LEFT},
64 	{"^", 1, 5, OPERATOR_BINARY, OPERATOR_LEFT},
65 	{"|", 1, 6, OPERATOR_BINARY, OPERATOR_LEFT},
66 	{"(", 1, 7, OPERATOR_OTHER,  OPERATOR_NONE}
67 };
68 
69 // Returns an array of tokens extracted from the expression. The array is
70 // terminated by a token with type `TOKEN_NONE`.
71 static Token *tokenize(const char *expression);
72 
73 // Parses a tokenized expression.
74 static Status parse(const Token *tokens, Stack **operands, Stack **operators,
75                     Stack **functions);
76 
77 // Pushes an operator to the stack after applying operators with a higher
78 // precedence.
79 static Status push_operator(const Operator *operator, Stack **operands,
80                             Stack **operators);
81 
82 // Pushes the multiplication operator to the stack.
83 static Status push_multiplication(Stack **operands, Stack **operators);
84 
85 // Allocates memory for a double and pushes it to the stack.
86 static void push_num(uint64_t x, Stack **operands);
87 
88 // Pops a double from the stack, frees its memory and returns its value.
89 static uint64_t pop_num(Stack **operands);
90 
91 // Converts a string into a number and pushes it to the stack.
92 static Status push_number(const char *value, Stack **operands);
93 
94 // Converts a constant identifier into its value and pushes it to the stack.
95 static Status push_constant(const char *value, Stack **operands);
96 
97 // Applies an operator to the top one or two operands, depending on if the
98 // operator is unary or binary.
99 static Status apply_operator(const Operator *operator, Stack **operands);
100 
101 // Applies a unary operator to the top operand.
102 static Status apply_unary_operator(const Operator *operator, Stack **operands);
103 
104 // Applies a function to the top operand.
105 static Status apply_function(const char *function, Stack **operands);
106 
107 // Returns the arity of an operator, using the previous token for context.
108 static OperatorArity get_arity(char *symbol, const Token *previous);
109 
110 // Returns a matching operator.
111 static const Operator *get_operator(char *symbol, OperatorArity arity);
112 
shunting_yard(const char * expression,uint64_t * result)113 Status shunting_yard(const char *expression, uint64_t *result)
114 {
115 	Token *tokens = tokenize(expression);
116 	Stack *operands = NULL, *operators = NULL, *functions = NULL;
117 	Status status = parse(tokens, &operands, &operators, &functions);
118 	if (operands)
119 		*result = pop_num(&operands);
120 	else if (status == STATUS_OK)
121 		status = ERROR_NO_INPUT;
122 
123 	for (Token *token = tokens; token->type != TOKEN_NONE; token++)
124 		free(token->value);
125 	free(tokens);
126 	while (operands)
127 		pop_num(&operands);
128 	while (operators)
129 		stack_pop(&operators);
130 	while (functions)
131 		stack_pop(&functions);
132 	return status;
133 }
134 
135 #define MAX_TOKEN_SIZE 64
136 
tokenize(const char * expression)137 Token *tokenize(const char *expression)
138 {
139 	int length = 0;
140 	Token *tokens = malloc(sizeof * tokens);
141 	const char *c = expression;
142 	while (*c) {
143 		char cur_token[MAX_TOKEN_SIZE];
144 		Token token = {TOKEN_UNKNOWN, NULL};
145 
146 		if (*c == '(')
147 			token.type = TOKEN_OPEN_PARENTHESIS;
148 		else if (*c == ')')
149 			token.type = TOKEN_CLOSE_PARENTHESIS;
150 		else if (!strncmp("<<", c, 2) || !strncmp(">>", c, 2)) {
151 			token.type = TOKEN_OPERATOR;
152 			token.value = strndup(c, 2);
153 		} else if (strchr("~!^*/%+-|&", *c)) {
154 			token.type = TOKEN_OPERATOR;
155 			token.value = strndup(c, 1);
156 		} else if (!strncmp("bit", c, 3) || !strncmp("BIT", c, 3)) {
157 			token.value = strndup(c, 3);
158 			token.type = TOKEN_IDENTIFIER;
159 		} else if (sscanf(c, "%[xX0-9a-fA-F.]", cur_token)) {
160 			token.type = TOKEN_NUMBER;
161 			token.value = strdup(cur_token);
162 		} else if (sscanf(c, "%[A-Za-z$]", cur_token)) {
163 			token.type = TOKEN_IDENTIFIER;
164 			token.value = strdup(cur_token);
165 		}
166 
167 		if (!isspace(*c)) {
168 			tokens = realloc(tokens, sizeof * tokens *
169 					 (++length + 1));
170 			tokens[length - 1] = token;
171 		}
172 		c += token.value ? strlen(token.value) : 1;
173 	}
174 	tokens[length] = NO_TOKEN;
175 
176 	return tokens;
177 }
178 
parse(const Token * tokens,Stack ** operands,Stack ** operators,Stack ** functions)179 Status parse(const Token *tokens, Stack **operands, Stack **operators,
180              Stack **functions)
181 {
182 	Status status = STATUS_OK;
183 
184 	for (const Token *token = tokens, *previous = &NO_TOKEN, *next = token + 1;
185 	     token->type != TOKEN_NONE; previous = token, token = next++) {
186 		switch (token->type) {
187 		case TOKEN_OPEN_PARENTHESIS:
188 			// Implicit multiplication: "(2)(2)".
189 			if (previous->type == TOKEN_CLOSE_PARENTHESIS)
190 				status = push_multiplication(operands, operators);
191 
192 			stack_push(operators, get_operator("(", OPERATOR_OTHER));
193 			break;
194 
195 		case TOKEN_CLOSE_PARENTHESIS: {
196 			// Apply operators until the previous open parenthesis is found.
197 			bool found_parenthesis = false;
198 			while (*operators && status == STATUS_OK && !found_parenthesis) {
199 				const Operator *operator = stack_pop(operators);
200 				if (!strncmp(operator->symbol, "(", 1))
201 					found_parenthesis = true;
202 				else
203 					status = apply_operator(operator, operands);
204 			}
205 			if (!found_parenthesis)
206 				status = ERROR_CLOSE_PARENTHESIS;
207 			else if (*functions)
208 				status = apply_function(stack_pop(functions), operands);
209 			break;
210 		}
211 
212 		case TOKEN_OPERATOR:
213 			status = push_operator(
214 			             get_operator(token->value,
215 			                          get_arity(token->value, previous)),
216 			             operands, operators);
217 			break;
218 
219 		case TOKEN_NUMBER:
220 			if (previous->type == TOKEN_CLOSE_PARENTHESIS ||
221 			    previous->type == TOKEN_NUMBER ||
222 			    previous->type == TOKEN_IDENTIFIER)
223 				status = ERROR_SYNTAX;
224 			else {
225 				status = push_number(token->value, operands);
226 
227 				// Implicit multiplication: "2(2)" or "2a".
228 				if (next->type == TOKEN_OPEN_PARENTHESIS ||
229 				    next->type == TOKEN_IDENTIFIER)
230 					status = push_multiplication(operands, operators);
231 			}
232 			break;
233 
234 		case TOKEN_IDENTIFIER:
235 			// The identifier could be either a constant or function.
236 			status = push_constant(token->value, operands);
237 			if ((status == ERROR_UNDEFINED_CONSTANT) &&
238 			     (next->type == TOKEN_OPEN_PARENTHESIS)) {
239 				stack_push(functions, token->value);
240 				status = STATUS_OK;
241 			} else if (next->type == TOKEN_OPEN_PARENTHESIS ||
242 			           next->type == TOKEN_IDENTIFIER) {
243 				// Implicit multiplication: "a(2)" or "a b".
244 				status = push_multiplication(operands, operators);
245 			}
246 			break;
247 
248 		default:
249 			status = ERROR_UNRECOGNIZED;
250 		}
251 		if (status != STATUS_OK)
252 			return status;
253 	}
254 
255 	// Apply all remaining operators.
256 	while (*operators && status == STATUS_OK) {
257 		const Operator *operator = stack_pop(operators);
258 		if (!strncmp(operator->symbol, "(", 1))
259 			status = ERROR_OPEN_PARENTHESIS;
260 		else
261 			status = apply_operator(operator, operands);
262 	}
263 	return status;
264 }
265 
push_operator(const Operator * operator,Stack ** operands,Stack ** operators)266 Status push_operator(const Operator *operator, Stack **operands,
267                      Stack **operators)
268 {
269 	if (!operator)
270 		return ERROR_SYNTAX;
271 
272 	Status status = STATUS_OK;
273 	while (*operators && status == STATUS_OK) {
274 		const Operator *stack_operator = stack_top(*operators);
275 		if (operator->arity == OPERATOR_UNARY ||
276 		    operator->precedence < stack_operator->precedence ||
277 		    (operator->associativity == OPERATOR_RIGHT &&
278 		     operator->precedence == stack_operator->precedence))
279 			break;
280 
281 		status = apply_operator(stack_pop(operators), operands);
282 	}
283 	stack_push(operators, operator);
284 	return status;
285 }
286 
push_multiplication(Stack ** operands,Stack ** operators)287 Status push_multiplication(Stack **operands, Stack **operators)
288 {
289 	return push_operator(get_operator("*", OPERATOR_BINARY), operands,
290 	                     operators);
291 }
292 
push_num(uint64_t x,Stack ** operands)293 void push_num(uint64_t x, Stack **operands)
294 {
295 	uint64_t *pointer = malloc(sizeof * pointer);
296 	*pointer = x;
297 	stack_push(operands, pointer);
298 }
299 
pop_num(Stack ** operands)300 uint64_t pop_num(Stack **operands)
301 {
302 	const uint64_t *pointer = stack_pop(operands);
303 	uint64_t x = *pointer;
304 	free((void *)pointer);
305 	return x;
306 }
307 
push_number(const char * value,Stack ** operands)308 Status push_number(const char *value, Stack **operands)
309 {
310 	uint64_t x;
311 
312 	if (parse_input(value, &x))
313 		return ERROR_SYNTAX;
314 
315 	push_num(x, operands);
316 	return STATUS_OK;
317 }
318 
push_constant(const char * value,Stack ** operands)319 Status push_constant(const char *value, Stack **operands)
320 {
321 	uint64_t x;
322 
323 	if (strncmp(value, "$", 1) == 0)
324 		x = g_val;
325 	else
326 		return ERROR_UNDEFINED_CONSTANT;
327 
328 	push_num(x, operands);
329 	return STATUS_OK;
330 }
331 
apply_operator(const Operator * operator,Stack ** operands)332 Status apply_operator(const Operator *operator, Stack **operands)
333 {
334 	if (!operator || !*operands)
335 		return ERROR_SYNTAX;
336 	if (operator->arity == OPERATOR_UNARY)
337 		return apply_unary_operator(operator, operands);
338 
339 	uint64_t y = pop_num(operands);
340 	if (!*operands)
341 		return ERROR_SYNTAX;
342 	uint64_t x = pop_num(operands);
343 	Status status = STATUS_OK;
344 	switch (*operator->symbol) {
345 	case '*':
346 		x = x * y;
347 		break;
348 	case '/':
349 		x = x / y;
350 		break;
351 	case '%':
352 		x = x % y;
353 		break;
354 	case '+':
355 		x = x + y;
356 		break;
357 	case '-':
358 		x = x - y;
359 		break;
360 	case '>':
361 		x = x >> y;
362 		break;
363 	case '<':
364 		x = x << y;
365 		break;
366 	case '|':
367 		x = x | y;
368 		break;
369 	case '&':
370 		x = x & y;
371 		break;
372 	case '^':
373 		x = x ^ y;
374 		break;
375 	default:
376 		return ERROR_UNRECOGNIZED;
377 	}
378 	push_num(x, operands);
379 	return status;
380 }
381 
apply_unary_operator(const Operator * operator,Stack ** operands)382 Status apply_unary_operator(const Operator *operator, Stack **operands)
383 {
384 	uint64_t x = pop_num(operands);
385 	switch (*operator->symbol) {
386 	case '+':
387 		break;
388 	case '-':
389 		x = -x;
390 		break;
391 	case '~':
392 		x = ~x;
393 		break;
394 	case '!':
395 		x = !x;
396 		break;
397 	default:
398 		return ERROR_UNRECOGNIZED;
399 	}
400 	push_num(x, operands);
401 	return STATUS_OK;
402 }
403 
apply_function(const char * function,Stack ** operands)404 Status apply_function(const char *function, Stack **operands)
405 {
406 	if (!*operands)
407 		return ERROR_FUNCTION_ARGUMENTS;
408 
409 	uint64_t x = pop_num(operands);
410 
411 	if (strcasecmp(function, "bit") == 0)
412 		x = 1 << x;
413 	else
414 		return ERROR_UNDEFINED_FUNCTION;
415 
416 	push_num(x, operands);
417 	return STATUS_OK;
418 }
419 
get_arity(char * symbol,const Token * previous)420 OperatorArity get_arity(char *symbol, const Token *previous)
421 {
422 	if (*symbol == '!' || previous->type == TOKEN_NONE ||
423 	    previous->type == TOKEN_OPEN_PARENTHESIS ||
424 	    (previous->type == TOKEN_OPERATOR && *previous->value != '!'))
425 		return OPERATOR_UNARY;
426 	return OPERATOR_BINARY;
427 }
428 
get_operator(char * symbol,OperatorArity arity)429 const Operator *get_operator(char *symbol, OperatorArity arity)
430 {
431 	for (size_t i = 0; i < sizeof OPERATORS / sizeof OPERATORS[0]; i++) {
432 		if (!strncmp(OPERATORS[i].symbol, symbol, OPERATORS[i].op_len) &&
433 		    OPERATORS[i].arity == arity)
434 			return &OPERATORS[i];
435 	}
436 
437 	LOG("couldn't find %s operator\n", symbol);
438 	return NULL;
439 }
440