1 /*
2 Copyright (C) 2009 Facundo Domínguez
3
4 This file is part of Spacejunk.
5
6 Spacejunk is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Foobar is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Foobar. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "parsercombinators.h"
21 #include <sstream>
22 #include <iomanip>
23 #include <iostream>
24 #include <assert.h>
25 #include <math.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include "debugmsg.h"
29
30 using namespace std;
31
32
Parser(Tokenizer * tok)33 Parser::Parser(Tokenizer * tok): tok(tok) {
34 errorflag=false;
35 st.tokCount=0;
36 eofCount=0;
37 st.expected=NULL;
38 getNextToken();
39 };
40
~Parser()41 Parser::~Parser() {
42 clear();
43 }
44
operator =(Tokenizer * tok)45 Parser & Parser::operator = (Tokenizer * tok) {
46 clear();
47 this->tok=tok;
48 st.tokCount=0;
49 eofCount=0;
50 getNextToken();
51 return *this;
52 }
53
clear()54 void Parser::clear() {
55 errorflag=false;
56 if (st.expected) {
57 delete st.expected;
58 st.expected=NULL;
59 }
60 while (!keep_tokens.empty()) popState();
61 while (!recorded_tokens.empty()) {
62 delete recorded_tokens.front();
63 recorded_tokens.pop_front();
64 }
65 };
66
setError()67 void Parser::setError() {
68 errorflag=true;
69 }
clearError()70 void Parser::clearError() {
71 errorflag=false;
72 };
error()73 bool Parser::error() {
74 return errorflag || (recorded_tokens.empty() && tok->error());
75 }
errorMessage()76 std::string Parser::errorMessage() {
77 ostringstream temp;
78 temp<<"line "<<tokst.line<<", column "<<tokst.column<<": ";
79 if (st.expected) {
80 list<basic_string<wchar_t> >::iterator i=st.expected->begin();
81 if (!st.expected->empty()) {
82 temp<<"expecting "<<wstos(*i++);
83 if (i!=st.expected->end()) {
84 list<basic_string<wchar_t> >::iterator end=--st.expected->end();
85 for (;i!=end;i++) temp<<", "<<wstos(*i);
86 temp<<" or "<<wstos(*i);
87 }
88 temp<<" but found: "<<wstos(tokst.text);
89 } else temp<<"unexpected: "<<wstos(tokst.text);
90 } else temp<<"unexpected: "<<wstos(tokst.text);
91 for (std::list<std::list<Tokenizer::TokenST> *>::iterator i=recorded_tokens.begin();
92 i!=recorded_tokens.end();i++)
93 for (std::list<Tokenizer::TokenST>::iterator j=(*i)->begin();j!=(*i)->end();j++)
94 temp<<" "<<wstos(j->text);
95 return temp.str();
96 };
getNextToken()97 void Parser::getNextToken() {
98 if (!keep_tokens.empty())
99 keep_tokens.front().first->push_back(tokst);
100 if (st.expected) st.expected->clear();
101 if (recorded_tokens.empty()) {
102 if (tok->eof()) {
103 if (eofCount==st.tokCount) {
104 setError();
105 return;
106 }
107 } else if (!tok->getNext(&tokst)) {
108 setError();
109 return;
110 }
111 } else {
112 tokst=recorded_tokens.front()->front();
113 recorded_tokens.front()->pop_front();
114 if (recorded_tokens.front()->empty()) {
115 delete recorded_tokens.front();
116 recorded_tokens.pop_front();
117 }
118 }
119 st.tokCount++;
120 if (!eofCount && tok->eof()) eofCount=st.tokCount+1;
121 };
pushState()122 void Parser::pushState() {
123 keep_tokens.push_front(make_pair(new list<Tokenizer::TokenST>(),st));
124 if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
125 }
restoreState()126 void Parser::restoreState() {
127 assert(!keep_tokens.empty());
128 if (keep_tokens.front().first->empty()) {
129 delete keep_tokens.front().first;
130 if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
131 keep_tokens.pop_front();
132 } else {
133 recorded_tokens.push_front(keep_tokens.front().first);
134 ParserST st=keep_tokens.front().second;
135 getNextToken();
136 keep_tokens.pop_front();
137
138 if (this->st.expected) delete this->st.expected;
139 this->st=st;
140 }
141 }
popState()142 void Parser::popState() {
143 assert(!keep_tokens.empty());
144 if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
145 KeptTokens * l=keep_tokens.front().first;
146 keep_tokens.pop_front();
147 if (!keep_tokens.empty()) keep_tokens.front().first->splice(keep_tokens.front().first->end(),*l);
148 delete l;
149 }
expecting(const std::basic_string<wchar_t> & desc)150 Parser & Parser::expecting(const std::basic_string<wchar_t> & desc) {
151 if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
152 st.expected->push_back(desc);
153 return *this;
154 };
expecting(const char * desc)155 Parser & Parser::expecting(const char * desc) {
156 return expecting(stows(desc));
157 };
158
159
readAnyToken(int * code,std::basic_string<wchar_t> * text)160 Parser & Parser::readAnyToken(int* code,std::basic_string<wchar_t> * text) {
161 if (error())
162 return *this;
163 if (code)
164 *code = tokst.code;
165 if (text) *text=tokst.text;
166 getNextToken();
167 return *this;
168 }
169
readToken(int code,std::basic_string<wchar_t> * text)170 Parser & Parser::readToken(int code,std::basic_string<wchar_t> * text) {
171 if (tokst.code!=code) {
172 setError();
173 return *this;
174 }
175 return readAnyToken(NULL,text);
176 };
177
readToken(int code,std::string * text)178 Parser & Parser::readToken(int code,std::string * text) {
179 basic_string<wchar_t> t;
180 readToken(code,&t);
181 if (text) *text=wstos(t);
182 return *this;
183 };
184
reachedEof()185 inline bool Parser::reachedEof() {
186 return eofCount<=st.tokCount && tok->eof();
187 }
188
eof()189 Parser & Parser::eof() {
190 if (!reachedEof()) setError();
191 return *this;
192 };
193
readAny(wchar_t * c)194 Parser & Parser::readAny(wchar_t * c) {
195 if (tokst.text.length()!=1) {
196 setError();
197 return *this;
198 }
199 if (c) *c=tokst.text[0];
200 getNextToken();
201 return *this;
202 };
203
readChar(wchar_t c)204 Parser & Parser::readChar(wchar_t c) {
205 if (tokst.text.length()==1 && tokst.text[0]==c)
206 getNextToken();
207 else setError();
208 return *this;
209 };
210
readstring(std::basic_string<wchar_t> seq)211 Parser & Parser::readstring(std::basic_string<wchar_t> seq) {
212 bool eq=true;
213 int pos=0;
214 while (!error() && int(seq.length())>pos && int(seq.length())-pos>=int(tokst.text.length())
215 && (eq=!seq.substr(pos,tokst.text.length()).compare(0,tokst.text.length(),tokst.text))) {
216 pos+=tokst.text.length();
217 getNextToken();
218 }
219 if (!eq || int(seq.length())>pos) {
220 //tokst.text=seq.substr(0,pos)+tokst.text;
221 setError();
222 }
223 return *this;
224 };
225
readstring(const char * seq)226 Parser & Parser::readstring(const char * seq) {
227 return readstring(stows(seq));
228 }
229
230
ConfigTokenizer(CharTokenizer * ctok)231 ConfigTokenizer::ConfigTokenizer(CharTokenizer *ctok): l(ctok) {
232 };
233
234 #define WSNEOLN MANY(NOTFOLLOWBY(CHAR('\n'))CHARCHECK(iswspace))
235 #define ANYCHAR PO(readAny(NULL))
236
getNext(TokenST * tok)237 bool ConfigTokenizer::getNext(TokenST*tok) {
238 if (l.reachedEof()) return false;
239 wchar_t c;
240 tok->code=TOKUNKNOWN;
241 tok->line=l.st.line;
242 tok->column=l.st.column;
243 // Descarto las lineas que empiezan con #
244 PARSEbegin(Lexer,l)
245 MANYbegin;
246 TRY(S("<!--"));
247 MANYbegin;
248 TRY(NOTFOLLOWBY(S("-->")))ANYCHAR;
249 MANY(NOTONEOF("-"));
250 MANYend;
251 S("-->");
252 WS;
253 MANYend;
254
255 tok->code=TOKUNKNOWN;
256 tok->line=l.st.line;
257 tok->column=l.st.column;
258 tok->text=L"";
259 switch (l.getCurrentChar()) {
260 case L'<' :
261 case L'>':
262 PO(readAny(&c));
263 tok->text=c;
264 WS;
265 break;
266 case L'/' :
267 case L'=':
268 case L'?':
269 PO(readAny(&c));
270 tok->text=c;
271 break;
272 case L'\"':
273 LATTTEXT(&tok->text);
274 tok->code=TOKATTTEXT;
275 break;
276 default:
277 if (iswspace(l.getCurrentChar())) {
278 WS1;
279 tok->text=L" ";
280 } else {
281 tok->code=TOKID;
282 ID(&tok->text);
283 }
284 }
285 ENDBLOCK;
286 PARSEend;
287 return !l.error();
288 };
eof()289 bool ConfigTokenizer::eof() {
290 return l.reachedEof();
291 }
error()292 bool ConfigTokenizer::error() {
293 return l.error();
294 }
295
296
Lexer(CharTokenizer * tok)297 Lexer::Lexer(CharTokenizer * tok): tok(tok) {
298 errorflag=false;
299 st.expected=NULL;
300 st.line=1;
301 st.column=0;
302 st.charCount=0;
303 eofCount=0;
304 if (tok->eof()) eofCount=st.charCount+1;
305 getNextToken();
306 };
307
~Lexer()308 Lexer::~Lexer() {
309 clear();
310 }
311
operator =(CharTokenizer * tok)312 Lexer & Lexer::operator = (CharTokenizer * tok) {
313 clear();
314 this->tok=tok;
315 st.line=0;
316 st.column=1;
317 st.charCount=0;
318 eofCount=0;
319 if (tok->eof()) eofCount=st.charCount+1;
320 getNextToken();
321 return *this;
322 }
323
clear()324 void Lexer::clear() {
325 errorflag=false;
326 while (!keep_tokens.empty()) popState();
327 while (!recorded_tokens.empty()) {
328 recorded_tokens.pop_front();
329 }
330 if (st.expected) {
331 delete st.expected;
332 st.expected=NULL;
333 }
334 };
335
setError()336 void Lexer::setError() {
337 errorflag=true;
338 }
clearError()339 void Lexer::clearError() {
340 errorflag=false;
341 };
error()342 bool Lexer::error() {
343 return errorflag || (recorded_tokens.empty() && tok->error() && !(tok->eof() && st.charCount<eofCount));
344 }
getNextToken()345 wchar_t Lexer::getNextToken() {
346 if (st.expected) st.expected->clear();
347 if (!keep_tokens.empty())
348 keep_tokens.front().first+=currentChar;
349 if (recorded_tokens.empty())
350 if (tok->eof()) {
351 if (eofCount==st.charCount) {
352 setError();
353 currentChar=btowc('\0');
354 return currentChar;
355 }
356 } else currentChar=tok->readNext();
357 else {
358 currentChar=recorded_tokens.front()[0];
359 recorded_tokens.front().erase(0,1);
360 if (recorded_tokens.front().length()==0) {
361 recorded_tokens.pop_front();
362 }
363 };
364 st.charCount++;
365 if (!eofCount && tok->eof()) eofCount=st.charCount+1;
366 if (!tok->eof()) {
367 if (currentChar==L'\n') {
368 st.line++;
369 st.column=0;
370 } else if (currentChar!=L'\r') st.column++;
371 }
372 return currentChar;
373 };
pushState()374 void Lexer::pushState() {
375 keep_tokens.push_front(make_pair(L"",st));
376 if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
377 }
restoreState()378 void Lexer::restoreState() {
379 assert(!keep_tokens.empty());
380 if (keep_tokens.front().first.length()==0) {
381 if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
382 keep_tokens.pop_front();
383 } else {
384 recorded_tokens.push_front(keep_tokens.front().first);
385 if (!reachedEof()) recorded_tokens.front()+=currentChar;
386 LexerST st=keep_tokens.front().second;
387 keep_tokens.pop_front();
388 getNextToken();
389 if (this->st.expected) delete this->st.expected;
390 this->st=st;
391 }
392 }
popState()393 void Lexer::popState() {
394 assert(!keep_tokens.empty());
395 if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
396 std::basic_string<wchar_t> s=keep_tokens.front().first;
397 keep_tokens.pop_front();
398 if (!keep_tokens.empty()) keep_tokens.front().first+=s;
399 }
readChar(wchar_t c)400 Lexer & Lexer::readChar(wchar_t c) {
401 if (currentChar!=c || reachedEof()) {
402 setError();
403 return *this;
404 }
405 getNextToken();
406 return *this;
407 };
408
reachedEof()409 inline bool Lexer::reachedEof() {
410 return eofCount<=st.charCount && tok->eof();
411 };
412
readAny(wchar_t * c)413 Lexer & Lexer::readAny(wchar_t * c) {
414 if (c) *c=currentChar;
415 getNextToken();
416 return *this;
417 };
418
eof()419 Lexer & Lexer::eof() {
420 if (!reachedEof()) setError();
421 return *this;
422 };
423
oneOf(const wchar_t * cs)424 Lexer & Lexer::oneOf(const wchar_t * cs) {
425 for (;*cs!=L'\0' && !error();cs++) if (currentChar==*cs) {
426 getNextToken();
427 return *this;
428 }
429 setError();
430 return *this;
431 };
432
oneOf(const char * cs)433 Lexer & Lexer::oneOf(const char * cs) {
434 for (;*cs!='\0' && !error();cs++) if (wctob(currentChar)==*cs) {
435 getNextToken();
436 return *this;
437 }
438 setError();
439 return *this;
440 };
441
notOneOf(const wchar_t * cs)442 Lexer & Lexer::notOneOf(const wchar_t * cs) {
443 for (;*cs!=L'\0';cs++) if (currentChar==*cs) {
444 setError();
445 return *this;
446 }
447 getNextToken();
448 return *this;
449 };
450
notOneOf(const char * cs)451 Lexer & Lexer::notOneOf(const char * cs) {
452 for (;*cs!='\0';cs++) if (wctob(currentChar)==*cs) {
453 setError();
454 return *this;
455 }
456 getNextToken();
457 return *this;
458 };
459
readstring(const wchar_t * cs)460 Lexer & Lexer::readstring(const wchar_t * cs) {
461 for (;*cs!='\0' && !error() && currentChar==*cs ;cs++)
462 getNextToken();
463 if (*cs!='\0')
464 setError();
465 return *this;
466 };
467
readstring(const char * cs)468 Lexer & Lexer::readstring(const char * cs) {
469 return readstring(stows(cs).c_str());
470 }
471
472
errorMessage()473 std::string Lexer::errorMessage() {
474 ostringstream temp;
475 string desc;
476 if (reachedEof()) desc="end of input";
477 else if (tok->error()) desc="read error";
478 else desc=currentChar;
479 temp<<"line "<<st.line<<", column "<<st.column<<": ";
480 if (st.expected) {
481 list<basic_string<wchar_t> >::iterator i=st.expected->begin();
482 if (!st.expected->empty()) {
483 temp<<"expecting "<<wstos(*i++);
484 if (i!=st.expected->end()) {
485 list<basic_string<wchar_t> >::iterator end=--st.expected->end();
486 for (;i!=end;i++) temp<<", "<<wstos(*i);
487 temp<<" or "<<wstos(*i);
488 }
489 temp<<" but found: "<<desc;
490 } else temp<<"unexpected: "<<desc;
491 } else temp<<"unexpected: "<<desc;
492 for (list<basic_string<wchar_t> >::iterator i=recorded_tokens.begin();i!=recorded_tokens.end();i++)
493 temp<<wstos(*i);
494 return temp.str();
495 };
496
readFloat(double * d)497 Lexer & Lexer::readFloat(double * d) {
498 basic_string<wchar_t> str;
499 PARSEbegin(Lexer,*this);
500 INPUT(OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit))
501 OPT(CHAR('.')MANY1(CHARCHECK(isdigit)) OR POK)
502 OPT(CHAR('e')OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit)) OR POK)
503 ,&str);
504 *d=wcstod(str.c_str(),NULL);
505 if (isnan(*d)) PERROR;
506 PARSEend;
507 return *this;
508 }
509
readInt(int * d)510 Lexer & Lexer::readInt(int * d) {
511 basic_string<wchar_t> str;
512 PARSEbegin(Lexer,*this);
513 INPUT(MANY1(CHARCHECK(iswdigit)),&str);
514 const wchar_t * p=str.c_str();
515 wchar_t * tail;
516 *d=wcstol(p,&tail,10);
517 if (*tail!=L'\0') PERROR;
518 PARSEend;
519 return *this;
520 }
521
expecting(const std::basic_string<wchar_t> & desc)522 Lexer & Lexer::expecting(const std::basic_string<wchar_t> & desc) {
523 if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
524 st.expected->push_back(desc);
525 return *this;
526 };
527
expecting(const char * desc)528 Lexer & Lexer::expecting(const char * desc) {
529 return expecting(stows(desc));
530 };
531
getString()532 std::basic_string<wchar_t> Lexer::getString() {
533 if (keep_tokens.empty()) return L"";
534 else return keep_tokens.front().first;
535 };
536
537
538 class PHYSFSFiller : public Filler {
539 private:
540 FILE * f;
541 public:
PHYSFSFiller(FILE * f)542 PHYSFSFiller(FILE *f) : f(f) {}
fill(char * buffer,size_t atmost)543 size_t fill(char * buffer,size_t atmost) {
544 size_t res=fread(buffer,1,atmost,f);
545 if (res<0) return 0;
546 else return res;
547 };
548 };
549
StreamTokenizer(std::istream & i)550 StreamTokenizer::StreamTokenizer(std::istream & i)
551 : f(new STDStreamFiller(i)),sc(f) {};
552
StreamTokenizer(FILE * i)553 StreamTokenizer::StreamTokenizer(FILE * i)
554 : f(new PHYSFSFiller(i)),sc(f) {};
555
~StreamTokenizer()556 StreamTokenizer::~StreamTokenizer() {
557 delete f;
558 }
559
readNext()560 wchar_t StreamTokenizer::readNext() {
561 wchar_t c;
562 if (!sc.read(&c,1)) {
563 CHERROR<<"StreamTokenizer: wchar_t is not big enough!"<<ENDL;
564 exit(1);
565 };
566 return c;
567 };
568
eof()569 bool StreamTokenizer::eof() {
570 return sc.eof();
571 };
572
error()573 bool StreamTokenizer::error() {
574 return sc.error();
575 };
576