xref: /freebsd/usr.bin/dtc/input_buffer.cc (revision a0ee8cc6)
1 /*-
2  * Copyright (c) 2013 David Chisnall
3  * All rights reserved.
4  *
5  * This software was developed by SRI International and the University of
6  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7  * ("CTSRD"), as part of the DARPA CRASH research programme.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #include "input_buffer.hh"
34 #include <ctype.h>
35 #include <limits.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <functional>
41 #ifndef NDEBUG
42 #include <iostream>
43 #endif
44 
45 
46 #include <sys/stat.h>
47 #include <sys/mman.h>
48 #include <assert.h>
49 
50 #ifndef MAP_PREFAULT_READ
51 #define MAP_PREFAULT_READ 0
52 #endif
53 
54 namespace dtc
55 {
56 void
57 input_buffer::skip_spaces()
58 {
59 	if (cursor >= size) { return; }
60 	if (cursor < 0) { return; }
61 	char c = buffer[cursor];
62 	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
63 	       || (c == '\v') || (c == '\r'))
64 	{
65 		cursor++;
66 		if (cursor > size)
67 		{
68 			c = '\0';
69 		}
70 		else
71 		{
72 			c = buffer[cursor];
73 		}
74 	}
75 }
76 
77 input_buffer
78 input_buffer::buffer_from_offset(int offset, int s)
79 {
80 	if (s == 0)
81 	{
82 		s = size - offset;
83 	}
84 	if (offset > size)
85 	{
86 		return input_buffer();
87 	}
88 	if (s > (size-offset))
89 	{
90 		return input_buffer();
91 	}
92 	return input_buffer(&buffer[offset], s);
93 }
94 
95 bool
96 input_buffer::consume(const char *str)
97 {
98 	int len = strlen(str);
99 	if (len > size - cursor)
100 	{
101 		return false;
102 	}
103 	else
104 	{
105 		for (int i=0 ; i<len ; ++i)
106 		{
107 			if (str[i] != buffer[cursor + i])
108 			{
109 				return false;
110 			}
111 		}
112 		cursor += len;
113 		return true;
114 	}
115 	return false;
116 }
117 
118 bool
119 input_buffer::consume_integer(unsigned long long &outInt)
120 {
121 	// The first character must be a digit.  Hex and octal strings
122 	// are prefixed by 0 and 0x, respectively.
123 	if (!isdigit((*this)[0]))
124 	{
125 		return false;
126 	}
127 	char *end=0;
128 	outInt = strtoull(&buffer[cursor], &end, 0);
129 	if (end == &buffer[cursor])
130 	{
131 		return false;
132 	}
133 	cursor = end - buffer;
134 	return true;
135 }
136 
137 namespace {
138 
139 /**
140  * Convenience typedef for the type that we use for all values.
141  */
142 typedef unsigned long long valty;
143 
144 /**
145  * Expression tree currently being parsed.
146  */
147 struct expression
148 {
149 	/**
150 	 * Evaluate this node, taking into account operator precedence.
151 	 */
152 	virtual valty operator()() = 0;
153 	/**
154 	 * Returns the precedence of this node.  Lower values indicate higher
155 	 * precedence.
156 	 */
157 	virtual int precedence() = 0;
158 	virtual ~expression() {}
159 #ifndef NDEBUG
160 	/**
161 	 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
162 	 * `true`.
163 	 */
164 	void dump(bool nl=false)
165 	{
166 		if (this == nullptr)
167 		{
168 			std::cerr << "{nullptr}\n";
169 			return;
170 		}
171 		dump_impl();
172 		if (nl)
173 		{
174 			std::cerr << '\n';
175 		}
176 	}
177 	private:
178 	/**
179 	 * Method that sublcasses override to implement the behaviour of `dump()`.
180 	 */
181 	virtual void dump_impl() = 0;
182 #endif
183 };
184 
185 /**
186  * Expression wrapping a single integer.  Leaf nodes in the expression tree.
187  */
188 class terminal_expr : public expression
189 {
190 	/**
191 	 * The value that this wraps.
192 	 */
193 	valty val;
194 	/**
195 	 * Evaluate.  Trivially returns the value that this class wraps.
196 	 */
197 	valty operator()() override
198 	{
199 		return val;
200 	}
201 	int precedence() override
202 	{
203 		return 0;
204 	}
205 	public:
206 	/**
207 	 * Constructor.
208 	 */
209 	terminal_expr(valty v) : val(v) {}
210 #ifndef NDEBUG
211 	void dump_impl() override { std::cerr << val; }
212 #endif
213 };
214 
215 /**
216  * Parenthetical expression.  Exists to make the contents opaque.
217  */
218 struct paren_expression : public expression
219 {
220 	/**
221 	 * The expression within the parentheses.
222 	 */
223 	expression_ptr subexpr;
224 	/**
225 	 * Constructor.  Takes the child expression as the only argument.
226 	 */
227 	paren_expression(expression_ptr p) : subexpr(std::move(p)) {}
228 	int precedence() override
229 	{
230 		return 0;
231 	}
232 	/**
233 	 * Evaluate - just forwards to the underlying expression.
234 	 */
235 	valty operator()() override
236 	{
237 		return (*subexpr)();
238 	}
239 #ifndef NDEBUG
240 	void dump_impl() override
241 	{
242 		std::cerr << " (";
243 		subexpr->dump();
244 		std::cerr << ") ";
245 	}
246 #endif
247 };
248 
249 /**
250  * Template class for unary operators.  The `OpChar` template parameter is
251  * solely for debugging and makes it easy to print the expression.  The `Op`
252  * template parameter is a function object that implements the operator that
253  * this class provides.  Most of these are provided by the `<functional>`
254  * header.
255  */
256 template<char OpChar, class Op>
257 class unary_operator : public expression
258 {
259 	/**
260 	 * The subexpression for this unary operator.
261 	 */
262 	expression_ptr subexpr;
263 	valty operator()() override
264 	{
265 		Op op;
266 		return op((*subexpr)());
267 	}
268 	/**
269 	 * All unary operators have the same precedence.  They are all evaluated
270 	 * before binary expressions, but after parentheses.
271 	 */
272 	int precedence() override
273 	{
274 		return 3;
275 	}
276 	public:
277 	unary_operator(expression_ptr p) : subexpr(std::move(p)) {}
278 #ifndef NDEBUG
279 	void dump_impl() override
280 	{
281 		std::cerr << OpChar;
282 		subexpr->dump();
283 	}
284 #endif
285 };
286 
287 /**
288  * Abstract base class for binary operators.  Allows the tree to be modified
289  * without knowing what the operations actually are.
290  */
291 struct binary_operator_base : public expression
292 {
293 	/**
294 	 * The left side of the expression.
295 	 */
296 	expression_ptr lhs;
297 	/**
298 	 * The right side of the expression.
299 	 */
300 	expression_ptr rhs;
301 	/**
302 	 * Insert a node somewhere down the path of left children, until it would
303 	 * be preempting something that should execute first.
304 	 */
305 	void insert_left(binary_operator_base *new_left)
306 	{
307 		if (lhs->precedence() < new_left->precedence())
308 		{
309 			new_left->rhs = std::move(lhs);
310 			lhs.reset(new_left);
311 		}
312 		else
313 		{
314 			static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
315 		}
316 	}
317 };
318 
319 /**
320  * Template class for binary operators.  The precedence and the operation are
321  * provided as template parameters.
322  */
323 template<int Precedence, class Op>
324 struct binary_operator : public binary_operator_base
325 {
326 	valty operator()() override
327 	{
328 		Op op;
329 		return op((*lhs)(), (*rhs)());
330 	}
331 	int precedence() override
332 	{
333 		return Precedence;
334 	}
335 #ifdef NDEBUG
336 	/**
337 	 * Constructor.  Takes the name of the operator as an argument, for
338 	 * debugging.  Only stores it in debug mode.
339 	 */
340 	binary_operator(const char *) {}
341 #else
342 	const char *opName;
343 	binary_operator(const char *o) : opName(o) {}
344 	void dump_impl() override
345 	{
346 		lhs->dump();
347 		std::cerr << opName;
348 		rhs->dump();
349 	}
350 #endif
351 };
352 
353 /**
354  * Ternary conditional operators (`cond ? true : false`) are a special case -
355  * there are no other ternary operators.
356  */
357 class ternary_conditional_operator : public expression
358 {
359 	/**
360 	 * The condition for the clause.
361 	 */
362 	expression_ptr cond;
363 	/**
364 	 * The expression that this evaluates to if the condition is true.
365 	 */
366 	expression_ptr lhs;
367 	/**
368 	 * The expression that this evaluates to if the condition is false.
369 	 */
370 	expression_ptr rhs;
371 	valty operator()() override
372 	{
373 		return (*cond)() ? (*lhs)() : (*rhs)();
374 	}
375 	int precedence() override
376 	{
377 		// The actual precedence of a ternary conditional operator is 15, but
378 		// its associativity is the opposite way around to the other operators,
379 		// so we fudge it slightly.
380 		return 3;
381 	}
382 #ifndef NDEBUG
383 	void dump_impl() override
384 	{
385 		cond->dump();
386 		std::cerr << " ? ";
387 		lhs->dump();
388 		std::cerr << " : ";
389 		rhs->dump();
390 	}
391 #endif
392 	public:
393 	ternary_conditional_operator(expression_ptr c,
394 	                             expression_ptr l,
395 	                             expression_ptr r) :
396 		cond(std::move(c)), lhs(std::move(l)), rhs(std::move(r)) {}
397 };
398 
399 template<typename T>
400 struct lshift
401 {
402 	constexpr T operator()(const T &lhs, const T &rhs) const
403 	{
404 		return lhs << rhs;
405 	}
406 };
407 template<typename T>
408 struct rshift
409 {
410 	constexpr T operator()(const T &lhs, const T &rhs) const
411 	{
412 		return lhs >> rhs;
413 	}
414 };
415 template<typename T>
416 struct unary_plus
417 {
418 	constexpr T operator()(const T &val) const
419 	{
420 		return +val;
421 	}
422 };
423 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
424 template<typename T>
425 struct bit_not
426 {
427 	constexpr T operator()(const T &val) const
428 	{
429 		return ~val;
430 	}
431 };
432 
433 } // anonymous namespace
434 
435 
436 expression_ptr input_buffer::parse_binary_expression(expression_ptr lhs)
437 {
438 	next_token();
439 	binary_operator_base *expr = nullptr;
440 	char op = ((*this)[0]);
441 	switch (op)
442 	{
443 		default:
444 			return lhs;
445 		case '+':
446 			expr = new binary_operator<6, std::plus<valty>>("+");
447 			break;
448 		case '-':
449 			expr = new binary_operator<6, std::minus<valty>>("-");
450 			break;
451 		case '%':
452 			expr = new binary_operator<5, std::modulus<valty>>("%");
453 			break;
454 		case '*':
455 			expr = new binary_operator<5, std::multiplies<valty>>("*");
456 			break;
457 		case '/':
458 			expr = new binary_operator<5, std::divides<valty>>("/");
459 			break;
460 		case '<':
461 			cursor++;
462 			switch ((*this)[0])
463 			{
464 				default:
465 					parse_error("Invalid operator");
466 					return nullptr;
467 				case ' ':
468 				case '(':
469 				case '0'...'9':
470 					cursor--;
471 					expr = new binary_operator<8, std::less<valty>>("<");
472 					break;
473 				case '=':
474 					expr = new binary_operator<8, std::less_equal<valty>>("<=");
475 					break;
476 				case '<':
477 					expr = new binary_operator<7, lshift<valty>>("<<");
478 					break;
479 			}
480 			break;
481 		case '>':
482 			cursor++;
483 			switch ((*this)[0])
484 			{
485 				default:
486 					parse_error("Invalid operator");
487 					return nullptr;
488 				case '(':
489 				case ' ':
490 				case '0'...'9':
491 					cursor--;
492 					expr = new binary_operator<8, std::greater<valty>>(">");
493 					break;
494 				case '=':
495 					expr = new binary_operator<8, std::greater_equal<valty>>(">=");
496 					break;
497 				case '>':
498 					expr = new binary_operator<7, rshift<valty>>(">>");
499 					break;
500 					return lhs;
501 			}
502 			break;
503 		case '=':
504 			if ((*this)[1] != '=')
505 			{
506 				parse_error("Invalid operator");
507 				return nullptr;
508 			}
509 			consume('=');
510 			expr = new binary_operator<9, std::equal_to<valty>>("==");
511 			break;
512 		case '!':
513 			if ((*this)[1] != '=')
514 			{
515 				parse_error("Invalid operator");
516 				return nullptr;
517 			}
518 			cursor++;
519 			expr = new binary_operator<9, std::not_equal_to<valty>>("!=");
520 			break;
521 		case '&':
522 			if ((*this)[1] == '&')
523 			{
524 				expr = new binary_operator<13, std::logical_and<valty>>("&&");
525 			}
526 			else
527 			{
528 				expr = new binary_operator<10, std::bit_and<valty>>("&");
529 			}
530 			break;
531 		case '|':
532 			if ((*this)[1] == '|')
533 			{
534 				expr = new binary_operator<12, std::logical_or<valty>>("||");
535 			}
536 			else
537 			{
538 				expr = new binary_operator<14, std::bit_or<valty>>("|");
539 			}
540 			break;
541 		case '?':
542 		{
543 			consume('?');
544 			expression_ptr true_case = parse_expression();
545 			next_token();
546 			if (!true_case || !consume(':'))
547 			{
548 				parse_error("Expected : in ternary conditional operator");
549 				return nullptr;
550 			}
551 			expression_ptr false_case = parse_expression();
552 			if (!false_case)
553 			{
554 				parse_error("Expected false condition for ternary operator");
555 				return nullptr;
556 			}
557 			return expression_ptr(new ternary_conditional_operator(std::move(lhs),
558 						std::move(true_case), std::move(false_case)));
559 		}
560 	}
561 	cursor++;
562 	next_token();
563 	expression_ptr e(expr);
564 	expression_ptr rhs(parse_expression());
565 	if (!rhs)
566 	{
567 		return nullptr;
568 	}
569 	expr->lhs = std::move(lhs);
570 	if (rhs->precedence() < expr->precedence())
571 	{
572 		expr->rhs = std::move(rhs);
573 	}
574 	else
575 	{
576 		// If we're a normal left-to-right expression, then we need to insert
577 		// this as the far-left child node of the rhs expression
578 		binary_operator_base *rhs_op =
579 			static_cast<binary_operator_base*>(rhs.get());
580 		rhs_op->insert_left(expr);
581 		e.release();
582 		return rhs;
583 	}
584 	return e;
585 }
586 
587 expression_ptr input_buffer::parse_expression(bool stopAtParen)
588 {
589 	next_token();
590 	unsigned long long leftVal;
591 	expression_ptr lhs;
592 	switch ((*this)[0])
593 	{
594 		case '0'...'9':
595 			if (!consume_integer(leftVal))
596 			{
597 				return nullptr;
598 			}
599 			lhs.reset(new terminal_expr(leftVal));
600 			break;
601 		case '(':
602 		{
603 			consume('(');
604 			expression_ptr &&subexpr = parse_expression();
605 			if (!subexpr)
606 			{
607 				return nullptr;
608 			}
609 			lhs.reset(new paren_expression(std::move(subexpr)));
610 			if (!consume(')'))
611 			{
612 				return nullptr;
613 			}
614 			if (stopAtParen)
615 			{
616 				return lhs;
617 			}
618 			break;
619 		}
620 		case '+':
621 		{
622 			consume('+');
623 			expression_ptr &&subexpr = parse_expression();
624 			if (!subexpr)
625 			{
626 				return nullptr;
627 			}
628 			lhs.reset(new unary_operator<'+', unary_plus<valty>>(std::move(subexpr)));
629 			break;
630 		}
631 		case '-':
632 		{
633 			consume('-');
634 			expression_ptr &&subexpr = parse_expression();
635 			if (!subexpr)
636 			{
637 				return nullptr;
638 			}
639 			lhs.reset(new unary_operator<'-', std::negate<valty>>(std::move(subexpr)));
640 			break;
641 		}
642 		case '!':
643 		{
644 			consume('!');
645 			expression_ptr &&subexpr = parse_expression();
646 			if (!subexpr)
647 			{
648 				return nullptr;
649 			}
650 			lhs.reset(new unary_operator<'!', std::logical_not<valty>>(std::move(subexpr)));
651 			break;
652 		}
653 		case '~':
654 		{
655 			consume('~');
656 			expression_ptr &&subexpr = parse_expression();
657 			if (!subexpr)
658 			{
659 				return nullptr;
660 			}
661 			lhs.reset(new unary_operator<'~', bit_not<valty>>(std::move(subexpr)));
662 			break;
663 		}
664 	}
665 	if (!lhs)
666 	{
667 		return nullptr;
668 	}
669 	return parse_binary_expression(std::move(lhs));
670 }
671 
672 bool
673 input_buffer::consume_integer_expression(unsigned long long &outInt)
674 {
675 	switch ((*this)[0])
676 	{
677 		case '(':
678 		{
679 			expression_ptr e(parse_expression(true));
680 			if (!e)
681 			{
682 				return false;
683 			}
684 			outInt = (*e)();
685 			return true;
686 		}
687 		case '0'...'9':
688 			return consume_integer(outInt);
689 		default:
690 			return false;
691 	}
692 }
693 
694 bool
695 input_buffer::consume_hex_byte(uint8_t &outByte)
696 {
697 	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
698 	{
699 		return false;
700 	}
701 	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
702 	cursor += 2;
703 	return true;
704 }
705 
706 input_buffer&
707 input_buffer::next_token()
708 {
709 	int start;
710 	do {
711 		start = cursor;
712 		skip_spaces();
713 		// Parse /* comments
714 		if ((*this)[0] == '/' && (*this)[1] == '*')
715 		{
716 			// eat the start of the comment
717 			++(*this);
718 			++(*this);
719 			do {
720 				// Find the ending * of */
721 				while ((**this != '\0') && (**this != '*'))
722 				{
723 					++(*this);
724 				}
725 				// Eat the *
726 				++(*this);
727 			} while ((**this != '\0') && (**this != '/'));
728 			// Eat the /
729 			++(*this);
730 		}
731 		// Parse // comments
732 		if (((*this)[0] == '/' && (*this)[1] == '/'))
733 		{
734 			// eat the start of the comment
735 			++(*this);
736 			++(*this);
737 			// Find the ending of the line
738 			while (**this != '\n')
739 			{
740 				++(*this);
741 			}
742 			// Eat the \n
743 			++(*this);
744 		}
745 	} while (start != cursor);
746 	return *this;
747 }
748 
749 void
750 input_buffer::parse_error(const char *msg)
751 {
752 	int line_count = 1;
753 	int line_start = 0;
754 	int line_end = cursor;
755 	for (int i=cursor ; i>0 ; --i)
756 	{
757 		if (buffer[i] == '\n')
758 		{
759 			line_count++;
760 			if (line_start == 0)
761 			{
762 				line_start = i+1;
763 			}
764 		}
765 	}
766 	for (int i=cursor+1 ; i<size ; ++i)
767 	{
768 		if (buffer[i] == '\n')
769 		{
770 			line_end = i;
771 			break;
772 		}
773 	}
774 	fprintf(stderr, "Error on line %d: %s\n", line_count, msg);
775 	fwrite(&buffer[line_start], line_end-line_start, 1, stderr);
776 	putc('\n', stderr);
777 	for (int i=0 ; i<(cursor-line_start) ; ++i)
778 	{
779 		char c = (buffer[i+line_start] == '\t') ? '\t' : ' ';
780 		putc(c, stderr);
781 	}
782 	putc('^', stderr);
783 	putc('\n', stderr);
784 }
785 #ifndef NDEBUG
786 void
787 input_buffer::dump()
788 {
789 	fprintf(stderr, "Current cursor: %d\n", cursor);
790 	fwrite(&buffer[cursor], size-cursor, 1, stderr);
791 }
792 #endif
793 
794 mmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0)
795 {
796 	struct stat sb;
797 	if (fstat(fd, &sb))
798 	{
799 		perror("Failed to stat file");
800 	}
801 	size = sb.st_size;
802 	buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
803 			MAP_PREFAULT_READ, fd, 0);
804 	if (buffer == MAP_FAILED)
805 	{
806 		perror("Failed to mmap file");
807 		exit(EXIT_FAILURE);
808 	}
809 }
810 
811 mmap_input_buffer::~mmap_input_buffer()
812 {
813 	if (buffer != 0)
814 	{
815 		munmap((void*)buffer, size);
816 	}
817 }
818 
819 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
820 {
821 	int c;
822 	while ((c = fgetc(stdin)) != EOF)
823 	{
824 		b.push_back(c);
825 	}
826 	buffer = b.data();
827 	size = b.size();
828 }
829 
830 } // namespace dtc
831 
832