1 /*
2    Copyright (C) 2004 - 2009 by Philippe Plantier <ayin@anathas.org>
3    Copyright (C) 2010 - 2018 by Guillaume Melquiond <guillaume.melquiond@gmail.com>
4    Part of the Battle for Wesnoth Project https://www.wesnoth.org
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY.
12 
13    See the COPYING file for more details.
14 */
15 
16 #pragma once
17 
18 //#define DEBUG_TOKENIZER
19 
20 #include "buffered_istream.hpp"
21 
22 #include <istream>
23 #include <string>
24 
25 struct token
26 {
tokentoken27 	token() :
28 		type(END),
29 		value()
30 	{}
31 
32 	enum token_type
33 	{
34 		STRING,
35 		QSTRING,
36 		UNTERMINATED_QSTRING,
37 		MISC,
38 
39 		LF = '\n',
40 		EQUALS = '=',
41 		COMMA = ',',
42 		PLUS = '+',
43 		SLASH = '/',
44 		OPEN_BRACKET = '[',
45 		CLOSE_BRACKET = ']',
46 		UNDERSCORE = '_',
47 		END
48 	};
49 
50 	token_type type;
51 	std::string value;
52 };
53 
54 /** Abstract baseclass for the tokenizer. */
55 class tokenizer
56 {
57 public:
58 	tokenizer(std::istream& in);
59 	~tokenizer();
60 
61 	const token &next_token();
62 
current_token() const63 	const token &current_token() const
64 	{
65 		return token_;
66 	}
67 
68 #ifdef DEBUG_TOKENIZER
previous_token() const69 	const token &previous_token() const
70 	{
71 		return previous_token_;
72 	}
73 #endif
74 
textdomain() const75 	const std::string &textdomain() const
76 	{
77 		return textdomain_;
78 	}
79 
get_file() const80 	const std::string &get_file() const
81 	{
82 		return file_;
83 	}
84 
get_start_line() const85 	int get_start_line() const
86 	{
87 		return startlineno_;
88 	}
89 
90 private:
91 	tokenizer();
92 	int current_;
93 	int lineno_;
94 	int startlineno_;
95 
next_char()96 	void next_char()
97 	{
98 		if (current_ == '\n')
99 			++lineno_;
100 		next_char_fast();
101 	}
102 
next_char_fast()103 	void next_char_fast()
104 	{
105 		do {
106 			current_ = in_.get();
107 		} while (current_ == '\r');
108 #if 0
109 			/// @todo disabled until the campaign server is fixed
110 			if(in_.good()) {
111 				current_ = in_.get();
112 				if (current_ == '\r')
113 				{
114 					// we assume that there is only one '\r'
115 					if(in_.good()) {
116 						current_ = in_.get();
117 					} else {
118 						current_ = EOF;
119 					}
120 				}
121 			} else {
122 				current_ = EOF;
123 			}
124 #endif
125 	}
126 
peek_char()127 	int peek_char()
128 	{
129 		return in_.peek();
130 	}
131 
132 	enum
133 	{
134 		TOK_NONE = 0,
135 		TOK_SPACE = 1,
136 		TOK_NUMERIC = 2,
137 		TOK_ALPHA = 4
138 	};
139 
char_type(unsigned c) const140 	int char_type(unsigned c) const
141 	{
142 		return c < 128 ? char_types_[c] : 0;
143 	}
144 
is_space(int c) const145 	bool is_space(int c) const
146 	{
147 		return (char_type(c) & TOK_SPACE) == TOK_SPACE;
148 	}
149 
is_num(int c) const150 	bool is_num(int c) const
151 	{
152 		return (char_type(c) & TOK_NUMERIC) == TOK_NUMERIC;
153 	}
154 
is_alnum(int c) const155 	bool is_alnum(int c) const
156 	{
157 		return (char_type(c) & (TOK_ALPHA | TOK_NUMERIC)) != TOK_NONE;
158 	}
159 
160 	void skip_comment();
161 
162 	/**
163 	 * Returns true if the next characters are the one from @a cmd
164 	 * followed by a space. Skips all the matching characters.
165 	 */
166 	bool skip_command(char const *cmd);
167 
168 	std::string textdomain_;
169 	std::string file_;
170 	token token_;
171 #ifdef DEBUG_TOKENIZER
172 	token previous_token_;
173 #endif
174 	buffered_istream in_;
175 	char char_types_[128];
176 };
177