1 // license:GPL-2.0+ 2 // copyright-holders:Couriersud 3 4 #include "palloc.h" 5 #include "pstonum.h" 6 #include "pstrutil.h" 7 #include "ptokenizer.h" 8 9 namespace plib { 10 11 PERRMSGV(MF_EXPECTED_TOKEN_1_GOT_2, 2, "Expected token <{1}>, got <{2}>") 12 PERRMSGV(MF_EXPECTED_STRING_GOT_1, 1, "Expected a string, got <{1}>") 13 PERRMSGV(MF_EXPECTED_IDENTIFIER_GOT_1, 1, "Expected an identifier, got <{1}>") 14 PERRMSGV(MF_EXPECTED_ID_OR_NUM_GOT_1, 1, "Expected an identifier or number, got <{1}>") 15 PERRMSGV(MF_EXPECTED_NUMBER_GOT_1, 1, "Expected a number, got <{1}>") 16 PERRMSGV(MF_EXPECTED_LONGINT_GOT_1, 1, "Expected a logn int, got <{1}>") 17 PERRMSGV(MF_EXPECTED_LINENUM_GOT_1, 1, "Expected line number after line marker but got <{1}>") 18 PERRMSGV(MF_EXPECTED_FILENAME_GOT_1, 1, "Expected file name after line marker but got <{1}>") 19 20 // ---------------------------------------------------------------------------------------- 21 // A simple tokenizer 22 // ---------------------------------------------------------------------------------------- 23 skipeol()24 void ptokenizer::skipeol() 25 { 26 pstring::value_type c = getc(); 27 while (c != 0) 28 { 29 if (c == 10) 30 { 31 c = getc(); 32 if (c != 13) 33 ungetc(c); 34 return; 35 } 36 c = getc(); 37 } 38 } 39 getc()40 pstring::value_type ptokenizer::getc() 41 { 42 if (m_unget != 0) 43 { 44 pstring::value_type c = m_unget; 45 m_unget = 0; 46 return c; 47 } 48 if (m_px == m_cur_line.end()) 49 { 50 //++m_source_location.back(); 51 putf8string line; 52 if (m_strm->readline_lf(line)) 53 { 54 m_cur_line = pstring(line); 55 m_px = m_cur_line.begin(); 56 if (*m_px != '#') 57 m_token_queue->push_back(token_t(token_type::SOURCELINE, m_cur_line)); 58 } 59 else 60 return 0; 61 } 62 pstring::value_type c = *(m_px++); 63 return c; 64 } 65 ungetc(pstring::value_type c)66 void ptokenizer::ungetc(pstring::value_type c) 67 { 68 m_unget = c; 69 } 70 require_token(const token_id_t & token_num)71 void ptoken_reader::require_token(const token_id_t &token_num) 72 { 73 require_token(get_token(), token_num); 74 } require_token(const token_t & tok,const token_id_t & token_num)75 void ptoken_reader::require_token(const token_t &tok, const token_id_t &token_num) 76 { 77 if (!tok.is(token_num)) 78 { 79 error(MF_EXPECTED_TOKEN_1_GOT_2(token_num.name(), tok.str())); 80 } 81 } 82 get_string()83 pstring ptoken_reader::get_string() 84 { 85 token_t tok = get_token(); 86 if (!tok.is_type(token_type::STRING)) 87 { 88 error(MF_EXPECTED_STRING_GOT_1(tok.str())); 89 } 90 return tok.str(); 91 } 92 93 get_identifier()94 pstring ptoken_reader::get_identifier() 95 { 96 token_t tok = get_token(); 97 if (!tok.is_type(token_type::IDENTIFIER)) 98 { 99 error(MF_EXPECTED_IDENTIFIER_GOT_1(tok.str())); 100 } 101 return tok.str(); 102 } 103 get_identifier_or_number()104 pstring ptoken_reader::get_identifier_or_number() 105 { 106 token_t tok = get_token(); 107 if (!(tok.is_type(token_type::IDENTIFIER) || tok.is_type(token_type::NUMBER))) 108 { 109 error(MF_EXPECTED_ID_OR_NUM_GOT_1(tok.str())); 110 } 111 return tok.str(); 112 } 113 114 // FIXME: combine into template get_number_double()115 double ptoken_reader::get_number_double() 116 { 117 token_t tok = get_token(); 118 if (!tok.is_type(token_type::NUMBER)) 119 { 120 error(MF_EXPECTED_NUMBER_GOT_1(tok.str())); 121 } 122 bool err(false); 123 auto ret = plib::pstonum_ne<double>(tok.str(), err); 124 if (err) 125 error(MF_EXPECTED_NUMBER_GOT_1(tok.str())); 126 return ret; 127 } 128 get_number_long()129 long ptoken_reader::get_number_long() 130 { 131 token_t tok = get_token(); 132 if (!tok.is_type(token_type::NUMBER)) 133 { 134 error(MF_EXPECTED_LONGINT_GOT_1(tok.str()) ); 135 } 136 bool err(false); 137 auto ret = plib::pstonum_ne<long>(tok.str(), err); 138 if (err) 139 error(MF_EXPECTED_LONGINT_GOT_1(tok.str()) ); 140 return ret; 141 } 142 process_line_token(const token_t & tok)143 bool ptoken_reader::process_line_token(const token_t &tok) 144 { 145 if (tok.is_type(token_type::LINEMARKER)) 146 { 147 bool benter(false); 148 bool bexit(false); 149 pstring file; 150 unsigned lineno(0); 151 152 auto sp = psplit(tok.str(), ' '); 153 //printf("%d %s\n", (int) sp.size(), ret.str().c_str()); 154 155 bool err = false; 156 lineno = pstonum_ne<unsigned>(sp[1], err); 157 if (err) 158 error(MF_EXPECTED_LINENUM_GOT_1(tok.str())); 159 if (sp[2].substr(0,1) != "\"") 160 error(MF_EXPECTED_FILENAME_GOT_1(tok.str())); 161 file = sp[2].substr(1, sp[2].length() - 2); 162 163 for (std::size_t i = 3; i < sp.size(); i++) 164 { 165 if (sp[i] == "1") 166 benter = true; 167 if (sp[i] == "2") 168 bexit = true; 169 // FIXME: process flags; actually only 1 (file enter) and 2 (after file exit) 170 } 171 if (bexit) // pop the last location 172 m_source_location.pop_back(); 173 if (!benter) // new location! 174 m_source_location.pop_back(); 175 m_source_location.emplace_back(plib::source_location(file, lineno)); 176 return true; 177 } 178 179 if (tok.is_type(token_type::SOURCELINE)) 180 { 181 m_line = tok.str(); 182 ++m_source_location.back(); 183 return true; 184 } 185 186 return false; 187 } 188 get_token()189 ptoken_reader::token_t ptoken_reader::get_token() 190 { 191 token_t ret = get_token_queue(); 192 while (true) 193 { 194 if (ret.is_type(token_type::token_type::ENDOFFILE)) 195 return ret; 196 197 //printf("%s\n", ret.str().c_str()); 198 if (process_line_token(ret)) 199 { 200 ret = get_token_queue(); 201 } 202 else 203 { 204 return ret; 205 } 206 } 207 } 208 get_token_raw()209 ptoken_reader::token_t ptoken_reader::get_token_raw() 210 { 211 token_t ret = get_token_queue(); 212 process_line_token(ret); 213 return ret; 214 } 215 get_token_internal()216 ptoken_reader::token_t ptokenizer::get_token_internal() 217 { 218 // skip ws 219 pstring::value_type c = getc(); 220 while (m_whitespace.find(c) != pstring::npos) 221 { 222 c = getc(); 223 if (eof()) 224 { 225 return token_t(token_type::ENDOFFILE); 226 } 227 } 228 if (m_support_line_markers && c == '#') 229 { 230 pstring lm("#"); 231 do 232 { 233 c = getc(); 234 if (eof()) 235 return token_t(token_type::ENDOFFILE); 236 if (c == '\r' || c == '\n') 237 return token_t(token_type::LINEMARKER, lm); 238 lm += c; 239 } while (true); 240 } 241 if (m_number_chars_start.find(c) != pstring::npos) 242 { 243 // read number while we receive number or identifier chars 244 // treat it as an identifier when there are identifier chars in it 245 token_type ret = token_type::NUMBER; 246 pstring tokstr = ""; 247 while (true) { 248 if (m_identifier_chars.find(c) != pstring::npos && m_number_chars.find(c) == pstring::npos) 249 ret = token_type::IDENTIFIER; 250 else if (m_number_chars.find(c) == pstring::npos) 251 break; 252 tokstr += c; 253 c = getc(); 254 } 255 ungetc(c); 256 return token_t(ret, tokstr); 257 } 258 259 // not a number, try identifier 260 if (m_identifier_chars.find(c) != pstring::npos) 261 { 262 // read identifier till non identifier char 263 pstring tokstr = ""; 264 while (m_identifier_chars.find(c) != pstring::npos) 265 { 266 tokstr += c; 267 c = getc(); 268 } 269 ungetc(c); 270 auto id = m_tokens.find(tokstr); 271 return (id != m_tokens.end()) ? 272 token_t(id->second, tokstr) 273 : token_t(token_type::IDENTIFIER, tokstr); 274 } 275 276 if (c == m_string) 277 { 278 pstring tokstr = ""; 279 c = getc(); 280 while (c != m_string) 281 { 282 tokstr += c; 283 c = getc(); 284 } 285 return token_t(token_type::STRING, tokstr); 286 } 287 else 288 { 289 // read identifier till first identifier char or ws 290 pstring tokstr = ""; 291 while ((m_identifier_chars.find(c) == pstring::npos) && (m_whitespace.find(c) == pstring::npos)) 292 { 293 tokstr += c; 294 // expensive, check for single char tokens 295 if (tokstr.length() == 1) 296 { 297 auto id = m_tokens.find(tokstr); 298 if (id != m_tokens.end()) 299 return token_t(id->second, tokstr); 300 } 301 c = getc(); 302 } 303 ungetc(c); 304 auto id = m_tokens.find(tokstr); 305 return (id != m_tokens.end()) ? 306 token_t(id->second, tokstr) 307 : token_t(token_type::UNKNOWN, tokstr); 308 } 309 } 310 get_token_comment()311 ptoken_reader::token_t ptokenizer::get_token_comment() 312 { 313 token_t ret = get_token_internal(); 314 while (true) 315 { 316 if (ret.is_type(token_type::token_type::ENDOFFILE)) 317 return ret; 318 319 if (ret.is(m_tok_comment_start)) 320 { 321 do { 322 ret = get_token_internal(); 323 } while (ret.is_not(m_tok_comment_end)); 324 ret = get_token_internal(); 325 } 326 else if (ret.is(m_tok_line_comment)) 327 { 328 skipeol(); 329 ret = get_token_internal(); 330 } 331 else 332 { 333 return ret; 334 } 335 } 336 } 337 338 error(const perrmsg & errs)339 void ptoken_reader::error(const perrmsg &errs) 340 { 341 pstring s(""); 342 pstring trail (" from "); 343 pstring trail_first("In file included from "); 344 pstring e = plib::pfmt("{1}:{2}:0: error: {3}\n") 345 (m_source_location.back().file_name(), m_source_location.back().line(), errs()); 346 m_source_location.pop_back(); 347 while (!m_source_location.empty()) 348 { 349 if (m_source_location.size() == 1) 350 trail = trail_first; 351 s = plib::pfmt("{1}{2}:{3}:0\n{4}")(trail, m_source_location.back().file_name(), m_source_location.back().line(), s); 352 m_source_location.pop_back(); 353 } 354 verror("\n" + s + e + " " + m_line + "\n"); 355 } 356 357 } // namespace plib 358