1 // license:GPL-2.0+
2 // copyright-holders:Couriersud
3 
4 #include "palloc.h"
5 #include "pstonum.h"
6 #include "pstrutil.h"
7 #include "ptokenizer.h"
8 
9 namespace plib {
10 
11 	PERRMSGV(MF_EXPECTED_TOKEN_1_GOT_2,         2, "Expected token <{1}>, got <{2}>")
12 	PERRMSGV(MF_EXPECTED_STRING_GOT_1,          1, "Expected a string, got <{1}>")
13 	PERRMSGV(MF_EXPECTED_IDENTIFIER_GOT_1,      1, "Expected an identifier, got <{1}>")
14 	PERRMSGV(MF_EXPECTED_ID_OR_NUM_GOT_1,       1, "Expected an identifier or number, got <{1}>")
15 	PERRMSGV(MF_EXPECTED_NUMBER_GOT_1,          1, "Expected a number, got <{1}>")
16 	PERRMSGV(MF_EXPECTED_LONGINT_GOT_1,         1, "Expected a logn int, got <{1}>")
17 	PERRMSGV(MF_EXPECTED_LINENUM_GOT_1,         1, "Expected line number after line marker but got <{1}>")
18 	PERRMSGV(MF_EXPECTED_FILENAME_GOT_1,        1, "Expected file name after line marker but got <{1}>")
19 
20 	// ----------------------------------------------------------------------------------------
21 	// A simple tokenizer
22 	// ----------------------------------------------------------------------------------------
23 
skipeol()24 	void ptokenizer::skipeol()
25 	{
26 		pstring::value_type c = getc();
27 		while (c != 0)
28 		{
29 			if (c == 10)
30 			{
31 				c = getc();
32 				if (c != 13)
33 					ungetc(c);
34 				return;
35 			}
36 			c = getc();
37 		}
38 	}
39 
getc()40 	pstring::value_type ptokenizer::getc()
41 	{
42 		if (m_unget != 0)
43 		{
44 			pstring::value_type c = m_unget;
45 			m_unget = 0;
46 			return c;
47 		}
48 		if (m_px == m_cur_line.end())
49 		{
50 			//++m_source_location.back();
51 			putf8string line;
52 			if (m_strm->readline_lf(line))
53 			{
54 				m_cur_line = pstring(line);
55 				m_px = m_cur_line.begin();
56 				if (*m_px != '#')
57 					m_token_queue->push_back(token_t(token_type::SOURCELINE, m_cur_line));
58 			}
59 			else
60 				return 0;
61 		}
62 		pstring::value_type c = *(m_px++);
63 		return c;
64 	}
65 
ungetc(pstring::value_type c)66 	void ptokenizer::ungetc(pstring::value_type c)
67 	{
68 		m_unget = c;
69 	}
70 
require_token(const token_id_t & token_num)71 	void ptoken_reader::require_token(const token_id_t &token_num)
72 	{
73 		require_token(get_token(), token_num);
74 	}
require_token(const token_t & tok,const token_id_t & token_num)75 	void ptoken_reader::require_token(const token_t &tok, const token_id_t &token_num)
76 	{
77 		if (!tok.is(token_num))
78 		{
79 			error(MF_EXPECTED_TOKEN_1_GOT_2(token_num.name(), tok.str()));
80 		}
81 	}
82 
get_string()83 	pstring ptoken_reader::get_string()
84 	{
85 		token_t tok = get_token();
86 		if (!tok.is_type(token_type::STRING))
87 		{
88 			error(MF_EXPECTED_STRING_GOT_1(tok.str()));
89 		}
90 		return tok.str();
91 	}
92 
93 
get_identifier()94 	pstring ptoken_reader::get_identifier()
95 	{
96 		token_t tok = get_token();
97 		if (!tok.is_type(token_type::IDENTIFIER))
98 		{
99 			error(MF_EXPECTED_IDENTIFIER_GOT_1(tok.str()));
100 		}
101 		return tok.str();
102 	}
103 
get_identifier_or_number()104 	pstring ptoken_reader::get_identifier_or_number()
105 	{
106 		token_t tok = get_token();
107 		if (!(tok.is_type(token_type::IDENTIFIER) || tok.is_type(token_type::NUMBER)))
108 		{
109 			error(MF_EXPECTED_ID_OR_NUM_GOT_1(tok.str()));
110 		}
111 		return tok.str();
112 	}
113 
114 	// FIXME: combine into template
get_number_double()115 	double ptoken_reader::get_number_double()
116 	{
117 		token_t tok = get_token();
118 		if (!tok.is_type(token_type::NUMBER))
119 		{
120 			error(MF_EXPECTED_NUMBER_GOT_1(tok.str()));
121 		}
122 		bool err(false);
123 		auto ret = plib::pstonum_ne<double>(tok.str(), err);
124 		if (err)
125 			error(MF_EXPECTED_NUMBER_GOT_1(tok.str()));
126 		return ret;
127 	}
128 
get_number_long()129 	long ptoken_reader::get_number_long()
130 	{
131 		token_t tok = get_token();
132 		if (!tok.is_type(token_type::NUMBER))
133 		{
134 			error(MF_EXPECTED_LONGINT_GOT_1(tok.str()) );
135 		}
136 		bool err(false);
137 		auto ret = plib::pstonum_ne<long>(tok.str(), err);
138 		if (err)
139 			error(MF_EXPECTED_LONGINT_GOT_1(tok.str()) );
140 		return ret;
141 	}
142 
process_line_token(const token_t & tok)143 	bool ptoken_reader::process_line_token(const token_t &tok)
144 	{
145 		if (tok.is_type(token_type::LINEMARKER))
146 		{
147 			bool benter(false);
148 			bool bexit(false);
149 			pstring file;
150 			unsigned lineno(0);
151 
152 			auto sp = psplit(tok.str(), ' ');
153 			//printf("%d %s\n", (int) sp.size(), ret.str().c_str());
154 
155 			bool err = false;
156 			lineno = pstonum_ne<unsigned>(sp[1], err);
157 			if (err)
158 				error(MF_EXPECTED_LINENUM_GOT_1(tok.str()));
159 			if (sp[2].substr(0,1) != "\"")
160 				error(MF_EXPECTED_FILENAME_GOT_1(tok.str()));
161 			file = sp[2].substr(1, sp[2].length() - 2);
162 
163 			for (std::size_t i = 3; i < sp.size(); i++)
164 			{
165 				if (sp[i] == "1")
166 					benter = true;
167 				if (sp[i] == "2")
168 					bexit = true;
169 				// FIXME: process flags; actually only 1 (file enter) and 2 (after file exit)
170 			}
171 			if (bexit) // pop the last location
172 				m_source_location.pop_back();
173 			if (!benter) // new location!
174 				m_source_location.pop_back();
175 			m_source_location.emplace_back(plib::source_location(file, lineno));
176 			return true;
177 		}
178 
179 		if (tok.is_type(token_type::SOURCELINE))
180 		{
181 			m_line = tok.str();
182 			++m_source_location.back();
183 			return true;
184 		}
185 
186 		return false;
187 	}
188 
get_token()189 	ptoken_reader::token_t ptoken_reader::get_token()
190 	{
191 		token_t ret = get_token_queue();
192 		while (true)
193 		{
194 			if (ret.is_type(token_type::token_type::ENDOFFILE))
195 				return ret;
196 
197 			//printf("%s\n", ret.str().c_str());
198 			if (process_line_token(ret))
199 			{
200 				ret = get_token_queue();
201 			}
202 			else
203 			{
204 				return ret;
205 			}
206 		}
207 	}
208 
get_token_raw()209 	ptoken_reader::token_t ptoken_reader::get_token_raw()
210 	{
211 		token_t ret = get_token_queue();
212 		process_line_token(ret);
213 		return ret;
214 	}
215 
get_token_internal()216 	ptoken_reader::token_t ptokenizer::get_token_internal()
217 	{
218 		// skip ws
219 		pstring::value_type c = getc();
220 		while (m_whitespace.find(c) != pstring::npos)
221 		{
222 			c = getc();
223 			if (eof())
224 			{
225 				return token_t(token_type::ENDOFFILE);
226 			}
227 		}
228 		if (m_support_line_markers && c == '#')
229 		{
230 			pstring lm("#");
231 			do
232 			{
233 				c = getc();
234 				if (eof())
235 					return token_t(token_type::ENDOFFILE);
236 				if (c == '\r' || c == '\n')
237 					return token_t(token_type::LINEMARKER, lm);
238 				lm += c;
239 			} while (true);
240 		}
241 		if (m_number_chars_start.find(c) != pstring::npos)
242 		{
243 			// read number while we receive number or identifier chars
244 			// treat it as an identifier when there are identifier chars in it
245 			token_type ret = token_type::NUMBER;
246 			pstring tokstr = "";
247 			while (true) {
248 				if (m_identifier_chars.find(c) != pstring::npos && m_number_chars.find(c) == pstring::npos)
249 					ret = token_type::IDENTIFIER;
250 				else if (m_number_chars.find(c) == pstring::npos)
251 					break;
252 				tokstr += c;
253 				c = getc();
254 			}
255 			ungetc(c);
256 			return token_t(ret, tokstr);
257 		}
258 
259 		// not a number, try identifier
260 		if (m_identifier_chars.find(c) != pstring::npos)
261 		{
262 			// read identifier till non identifier char
263 			pstring tokstr = "";
264 			while (m_identifier_chars.find(c) != pstring::npos)
265 			{
266 				tokstr += c;
267 				c = getc();
268 			}
269 			ungetc(c);
270 			auto id = m_tokens.find(tokstr);
271 			return (id != m_tokens.end()) ?
272 					token_t(id->second, tokstr)
273 				:   token_t(token_type::IDENTIFIER, tokstr);
274 		}
275 
276 		if (c == m_string)
277 		{
278 			pstring tokstr = "";
279 			c = getc();
280 			while (c != m_string)
281 			{
282 				tokstr += c;
283 				c = getc();
284 			}
285 			return token_t(token_type::STRING, tokstr);
286 		}
287 		else
288 		{
289 			// read identifier till first identifier char or ws
290 			pstring tokstr = "";
291 			while ((m_identifier_chars.find(c) == pstring::npos) && (m_whitespace.find(c) == pstring::npos))
292 			{
293 				tokstr += c;
294 				// expensive, check for single char tokens
295 				if (tokstr.length() == 1)
296 				{
297 					auto id = m_tokens.find(tokstr);
298 					if (id != m_tokens.end())
299 						return token_t(id->second, tokstr);
300 				}
301 				c = getc();
302 			}
303 			ungetc(c);
304 			auto id = m_tokens.find(tokstr);
305 			return (id != m_tokens.end()) ?
306 					token_t(id->second, tokstr)
307 				:   token_t(token_type::UNKNOWN, tokstr);
308 		}
309 	}
310 
get_token_comment()311 	ptoken_reader::token_t ptokenizer::get_token_comment()
312 	{
313 		token_t ret = get_token_internal();
314 		while (true)
315 		{
316 			if (ret.is_type(token_type::token_type::ENDOFFILE))
317 				return ret;
318 
319 			if (ret.is(m_tok_comment_start))
320 			{
321 				do {
322 					ret = get_token_internal();
323 				} while (ret.is_not(m_tok_comment_end));
324 				ret = get_token_internal();
325 			}
326 			else if (ret.is(m_tok_line_comment))
327 			{
328 				skipeol();
329 				ret = get_token_internal();
330 			}
331 			else
332 			{
333 				return ret;
334 			}
335 		}
336 	}
337 
338 
error(const perrmsg & errs)339 	void ptoken_reader::error(const perrmsg &errs)
340 	{
341 		pstring s("");
342 		pstring trail      ("                 from ");
343 		pstring trail_first("In file included from ");
344 		pstring e = plib::pfmt("{1}:{2}:0: error: {3}\n")
345 				(m_source_location.back().file_name(), m_source_location.back().line(), errs());
346 		m_source_location.pop_back();
347 		while (!m_source_location.empty())
348 		{
349 			if (m_source_location.size() == 1)
350 				trail = trail_first;
351 			s = plib::pfmt("{1}{2}:{3}:0\n{4}")(trail, m_source_location.back().file_name(), m_source_location.back().line(), s);
352 			m_source_location.pop_back();
353 		}
354 		verror("\n" + s + e + " " + m_line + "\n");
355 	}
356 
357 } // namespace plib
358