1 /*
2   Copyright (C) 2009 Facundo Domínguez
3 
4   This file is part of Spacejunk.
5 
6   Spacejunk is free software: you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation, either version 3 of the License, or
9   (at your option) any later version.
10 
11   Foobar is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #include "parsercombinators.h"
21 #include <sstream>
22 #include <iomanip>
23 #include <iostream>
24 #include <assert.h>
25 #include <math.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include "debugmsg.h"
29 
30 using namespace std;
31 
32 
Parser(Tokenizer * tok)33 Parser::Parser(Tokenizer * tok): tok(tok) {
34     errorflag=false;
35     st.tokCount=0;
36     eofCount=0;
37     st.expected=NULL;
38     getNextToken();
39 };
40 
~Parser()41 Parser::~Parser() {
42     clear();
43 }
44 
operator =(Tokenizer * tok)45 Parser & Parser::operator = (Tokenizer * tok) {
46     clear();
47     this->tok=tok;
48     st.tokCount=0;
49     eofCount=0;
50     getNextToken();
51     return *this;
52 }
53 
clear()54 void Parser::clear() {
55     errorflag=false;
56     if (st.expected) {
57         delete st.expected;
58         st.expected=NULL;
59     }
60     while (!keep_tokens.empty()) popState();
61     while (!recorded_tokens.empty()) {
62         delete recorded_tokens.front();
63         recorded_tokens.pop_front();
64     }
65 };
66 
setError()67 void Parser::setError() {
68     errorflag=true;
69 }
clearError()70 void Parser::clearError() {
71     errorflag=false;
72 };
error()73 bool Parser::error() {
74     return errorflag || (recorded_tokens.empty() && tok->error());
75 }
errorMessage()76 std::string Parser::errorMessage() {
77     ostringstream temp;
78     temp<<"line "<<tokst.line<<", column "<<tokst.column<<": ";
79     if (st.expected) {
80         list<basic_string<wchar_t> >::iterator i=st.expected->begin();
81         if (!st.expected->empty()) {
82             temp<<"expecting "<<wstos(*i++);
83             if (i!=st.expected->end()) {
84                 list<basic_string<wchar_t> >::iterator end=--st.expected->end();
85                 for (;i!=end;i++) temp<<", "<<wstos(*i);
86                 temp<<" or "<<wstos(*i);
87             }
88             temp<<" but found: "<<wstos(tokst.text);
89         } else temp<<"unexpected: "<<wstos(tokst.text);
90     } else temp<<"unexpected: "<<wstos(tokst.text);
91     for (std::list<std::list<Tokenizer::TokenST> *>::iterator i=recorded_tokens.begin();
92             i!=recorded_tokens.end();i++)
93         for (std::list<Tokenizer::TokenST>::iterator j=(*i)->begin();j!=(*i)->end();j++)
94             temp<<" "<<wstos(j->text);
95     return temp.str();
96 };
getNextToken()97 void Parser::getNextToken() {
98     if (!keep_tokens.empty())
99         keep_tokens.front().first->push_back(tokst);
100     if (st.expected) st.expected->clear();
101     if (recorded_tokens.empty()) {
102         if (tok->eof()) {
103             if (eofCount==st.tokCount) {
104                 setError();
105                 return;
106             }
107         } else if (!tok->getNext(&tokst)) {
108             setError();
109             return;
110         }
111     } else {
112         tokst=recorded_tokens.front()->front();
113         recorded_tokens.front()->pop_front();
114         if (recorded_tokens.front()->empty()) {
115             delete recorded_tokens.front();
116             recorded_tokens.pop_front();
117         }
118     }
119     st.tokCount++;
120     if (!eofCount && tok->eof()) eofCount=st.tokCount+1;
121 };
pushState()122 void Parser::pushState() {
123     keep_tokens.push_front(make_pair(new list<Tokenizer::TokenST>(),st));
124     if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
125 }
restoreState()126 void Parser::restoreState() {
127     assert(!keep_tokens.empty());
128     if (keep_tokens.front().first->empty()) {
129         delete keep_tokens.front().first;
130         if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
131         keep_tokens.pop_front();
132     } else {
133         recorded_tokens.push_front(keep_tokens.front().first);
134         ParserST st=keep_tokens.front().second;
135         getNextToken();
136         keep_tokens.pop_front();
137 
138         if (this->st.expected) delete this->st.expected;
139         this->st=st;
140     }
141 }
popState()142 void Parser::popState() {
143     assert(!keep_tokens.empty());
144     if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
145     KeptTokens * l=keep_tokens.front().first;
146     keep_tokens.pop_front();
147     if (!keep_tokens.empty()) keep_tokens.front().first->splice(keep_tokens.front().first->end(),*l);
148     delete l;
149 }
expecting(const std::basic_string<wchar_t> & desc)150 Parser & Parser::expecting(const std::basic_string<wchar_t> & desc) {
151     if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
152     st.expected->push_back(desc);
153     return *this;
154 };
expecting(const char * desc)155 Parser & Parser::expecting(const char * desc) {
156     return expecting(stows(desc));
157 };
158 
159 
readAnyToken(int * code,std::basic_string<wchar_t> * text)160 Parser & Parser::readAnyToken(int* code,std::basic_string<wchar_t> * text) {
161     if (error())
162         return *this;
163 	if (code)
164 		*code = tokst.code;
165 	if (text) *text=tokst.text;
166     getNextToken();
167     return *this;
168 }
169 
readToken(int code,std::basic_string<wchar_t> * text)170 Parser & Parser::readToken(int code,std::basic_string<wchar_t> * text) {
171     if (tokst.code!=code) {
172         setError();
173         return *this;
174     }
175 	return readAnyToken(NULL,text);
176 };
177 
readToken(int code,std::string * text)178 Parser & Parser::readToken(int code,std::string * text) {
179     basic_string<wchar_t> t;
180     readToken(code,&t);
181     if (text) *text=wstos(t);
182     return *this;
183 };
184 
reachedEof()185 inline bool Parser::reachedEof() {
186     return eofCount<=st.tokCount && tok->eof();
187 }
188 
eof()189 Parser & Parser::eof() {
190     if (!reachedEof()) setError();
191     return *this;
192 };
193 
readAny(wchar_t * c)194 Parser & Parser::readAny(wchar_t * c) {
195     if (tokst.text.length()!=1) {
196         setError();
197         return *this;
198     }
199     if (c) *c=tokst.text[0];
200     getNextToken();
201     return *this;
202 };
203 
readChar(wchar_t c)204 Parser & Parser::readChar(wchar_t c) {
205     if (tokst.text.length()==1 && tokst.text[0]==c)
206         getNextToken();
207     else setError();
208     return *this;
209 };
210 
readstring(std::basic_string<wchar_t> seq)211 Parser & Parser::readstring(std::basic_string<wchar_t> seq) {
212     bool eq=true;
213     int pos=0;
214     while (!error() && int(seq.length())>pos && int(seq.length())-pos>=int(tokst.text.length())
215             && (eq=!seq.substr(pos,tokst.text.length()).compare(0,tokst.text.length(),tokst.text))) {
216         pos+=tokst.text.length();
217         getNextToken();
218     }
219     if (!eq || int(seq.length())>pos) {
220         //tokst.text=seq.substr(0,pos)+tokst.text;
221         setError();
222     }
223     return *this;
224 };
225 
readstring(const char * seq)226 Parser & Parser::readstring(const char * seq) {
227     return readstring(stows(seq));
228 }
229 
230 
ConfigTokenizer(CharTokenizer * ctok)231 ConfigTokenizer::ConfigTokenizer(CharTokenizer *ctok): l(ctok) {
232 };
233 
234 #define WSNEOLN MANY(NOTFOLLOWBY(CHAR('\n'))CHARCHECK(iswspace))
235 #define ANYCHAR PO(readAny(NULL))
236 
getNext(TokenST * tok)237 bool ConfigTokenizer::getNext(TokenST*tok) {
238     if (l.reachedEof()) return false;
239     wchar_t c;
240     tok->code=TOKUNKNOWN;
241     tok->line=l.st.line;
242     tok->column=l.st.column;
243     // Descarto las lineas que empiezan con #
244     PARSEbegin(Lexer,l)
245     MANYbegin;
246         TRY(S("<!--"));
247         MANYbegin;
248             TRY(NOTFOLLOWBY(S("-->")))ANYCHAR;
249             MANY(NOTONEOF("-"));
250         MANYend;
251         S("-->");
252         WS;
253     MANYend;
254 
255     tok->code=TOKUNKNOWN;
256     tok->line=l.st.line;
257     tok->column=l.st.column;
258     tok->text=L"";
259     switch (l.getCurrentChar()) {
260     case L'<' :
261     case L'>':
262         PO(readAny(&c));
263         tok->text=c;
264         WS;
265         break;
266     case L'/' :
267     case L'=':
268     case L'?':
269         PO(readAny(&c));
270         tok->text=c;
271         break;
272     case L'\"':
273         LATTTEXT(&tok->text);
274         tok->code=TOKATTTEXT;
275         break;
276     default:
277         if (iswspace(l.getCurrentChar())) {
278             WS1;
279             tok->text=L" ";
280         } else {
281             tok->code=TOKID;
282             ID(&tok->text);
283         }
284     }
285     ENDBLOCK;
286     PARSEend;
287     return !l.error();
288 };
eof()289 bool ConfigTokenizer::eof() {
290     return l.reachedEof();
291 }
error()292 bool ConfigTokenizer::error() {
293     return l.error();
294 }
295 
296 
Lexer(CharTokenizer * tok)297 Lexer::Lexer(CharTokenizer * tok): tok(tok) {
298     errorflag=false;
299     st.expected=NULL;
300     st.line=1;
301     st.column=0;
302     st.charCount=0;
303     eofCount=0;
304     if (tok->eof()) eofCount=st.charCount+1;
305     getNextToken();
306 };
307 
~Lexer()308 Lexer::~Lexer() {
309     clear();
310 }
311 
operator =(CharTokenizer * tok)312 Lexer & Lexer::operator = (CharTokenizer * tok) {
313     clear();
314     this->tok=tok;
315     st.line=0;
316     st.column=1;
317     st.charCount=0;
318     eofCount=0;
319     if (tok->eof()) eofCount=st.charCount+1;
320     getNextToken();
321     return *this;
322 }
323 
clear()324 void Lexer::clear() {
325     errorflag=false;
326     while (!keep_tokens.empty()) popState();
327     while (!recorded_tokens.empty()) {
328         recorded_tokens.pop_front();
329     }
330     if (st.expected) {
331         delete st.expected;
332         st.expected=NULL;
333     }
334 };
335 
setError()336 void Lexer::setError() {
337     errorflag=true;
338 }
clearError()339 void Lexer::clearError() {
340     errorflag=false;
341 };
error()342 bool Lexer::error() {
343     return errorflag || (recorded_tokens.empty() && tok->error() && !(tok->eof() && st.charCount<eofCount));
344 }
getNextToken()345 wchar_t Lexer::getNextToken() {
346     if (st.expected) st.expected->clear();
347     if (!keep_tokens.empty())
348         keep_tokens.front().first+=currentChar;
349     if (recorded_tokens.empty())
350         if (tok->eof()) {
351             if (eofCount==st.charCount) {
352                 setError();
353                 currentChar=btowc('\0');
354                 return currentChar;
355             }
356         } else currentChar=tok->readNext();
357     else {
358         currentChar=recorded_tokens.front()[0];
359         recorded_tokens.front().erase(0,1);
360         if (recorded_tokens.front().length()==0) {
361             recorded_tokens.pop_front();
362         }
363     };
364     st.charCount++;
365     if (!eofCount && tok->eof()) eofCount=st.charCount+1;
366     if (!tok->eof()) {
367         if (currentChar==L'\n') {
368             st.line++;
369             st.column=0;
370         } else if (currentChar!=L'\r') st.column++;
371     }
372     return currentChar;
373 };
pushState()374 void Lexer::pushState() {
375     keep_tokens.push_front(make_pair(L"",st));
376     if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
377 }
restoreState()378 void Lexer::restoreState() {
379     assert(!keep_tokens.empty());
380     if (keep_tokens.front().first.length()==0) {
381         if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
382         keep_tokens.pop_front();
383     } else {
384         recorded_tokens.push_front(keep_tokens.front().first);
385         if (!reachedEof()) recorded_tokens.front()+=currentChar;
386         LexerST st=keep_tokens.front().second;
387         keep_tokens.pop_front();
388         getNextToken();
389         if (this->st.expected) delete this->st.expected;
390         this->st=st;
391     }
392 }
popState()393 void Lexer::popState() {
394     assert(!keep_tokens.empty());
395     if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
396     std::basic_string<wchar_t> s=keep_tokens.front().first;
397     keep_tokens.pop_front();
398     if (!keep_tokens.empty()) keep_tokens.front().first+=s;
399 }
readChar(wchar_t c)400 Lexer & Lexer::readChar(wchar_t c) {
401     if (currentChar!=c || reachedEof()) {
402         setError();
403         return *this;
404     }
405     getNextToken();
406     return *this;
407 };
408 
reachedEof()409 inline bool Lexer::reachedEof() {
410     return eofCount<=st.charCount && tok->eof();
411 };
412 
readAny(wchar_t * c)413 Lexer & Lexer::readAny(wchar_t * c) {
414     if (c) *c=currentChar;
415     getNextToken();
416     return *this;
417 };
418 
eof()419 Lexer & Lexer::eof() {
420     if (!reachedEof()) setError();
421     return *this;
422 };
423 
oneOf(const wchar_t * cs)424 Lexer & Lexer::oneOf(const wchar_t * cs) {
425     for (;*cs!=L'\0' && !error();cs++) if (currentChar==*cs) {
426             getNextToken();
427             return *this;
428         }
429     setError();
430     return *this;
431 };
432 
oneOf(const char * cs)433 Lexer & Lexer::oneOf(const char * cs) {
434     for (;*cs!='\0' && !error();cs++) if (wctob(currentChar)==*cs) {
435             getNextToken();
436             return *this;
437         }
438     setError();
439     return *this;
440 };
441 
notOneOf(const wchar_t * cs)442 Lexer & Lexer::notOneOf(const wchar_t * cs) {
443     for (;*cs!=L'\0';cs++) if (currentChar==*cs) {
444             setError();
445             return *this;
446         }
447     getNextToken();
448     return *this;
449 };
450 
notOneOf(const char * cs)451 Lexer & Lexer::notOneOf(const char * cs) {
452     for (;*cs!='\0';cs++) if (wctob(currentChar)==*cs) {
453             setError();
454             return *this;
455         }
456     getNextToken();
457     return *this;
458 };
459 
readstring(const wchar_t * cs)460 Lexer & Lexer::readstring(const wchar_t * cs) {
461     for (;*cs!='\0' && !error() && currentChar==*cs ;cs++)
462         getNextToken();
463     if (*cs!='\0')
464         setError();
465     return *this;
466 };
467 
readstring(const char * cs)468 Lexer & Lexer::readstring(const char * cs) {
469     return readstring(stows(cs).c_str());
470 }
471 
472 
errorMessage()473 std::string Lexer::errorMessage() {
474     ostringstream temp;
475     string desc;
476     if (reachedEof()) desc="end of input";
477     else if (tok->error()) desc="read error";
478     else desc=currentChar;
479     temp<<"line "<<st.line<<", column "<<st.column<<": ";
480     if (st.expected) {
481         list<basic_string<wchar_t> >::iterator i=st.expected->begin();
482         if (!st.expected->empty()) {
483             temp<<"expecting "<<wstos(*i++);
484             if (i!=st.expected->end()) {
485                 list<basic_string<wchar_t> >::iterator end=--st.expected->end();
486                 for (;i!=end;i++) temp<<", "<<wstos(*i);
487                 temp<<" or "<<wstos(*i);
488             }
489             temp<<" but found: "<<desc;
490         } else temp<<"unexpected: "<<desc;
491     } else temp<<"unexpected: "<<desc;
492     for (list<basic_string<wchar_t> >::iterator i=recorded_tokens.begin();i!=recorded_tokens.end();i++)
493         temp<<wstos(*i);
494     return temp.str();
495 };
496 
readFloat(double * d)497 Lexer & Lexer::readFloat(double * d) {
498     basic_string<wchar_t> str;
499     PARSEbegin(Lexer,*this);
500     INPUT(OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit))
501           OPT(CHAR('.')MANY1(CHARCHECK(isdigit)) OR POK)
502           OPT(CHAR('e')OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit)) OR POK)
503           ,&str);
504     *d=wcstod(str.c_str(),NULL);
505     if (isnan(*d)) PERROR;
506     PARSEend;
507     return *this;
508 }
509 
readInt(int * d)510 Lexer & Lexer::readInt(int * d) {
511     basic_string<wchar_t> str;
512     PARSEbegin(Lexer,*this);
513     INPUT(MANY1(CHARCHECK(iswdigit)),&str);
514     const wchar_t * p=str.c_str();
515     wchar_t * tail;
516     *d=wcstol(p,&tail,10);
517     if (*tail!=L'\0') PERROR;
518     PARSEend;
519     return *this;
520 }
521 
expecting(const std::basic_string<wchar_t> & desc)522 Lexer & Lexer::expecting(const std::basic_string<wchar_t> & desc) {
523     if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
524     st.expected->push_back(desc);
525     return *this;
526 };
527 
expecting(const char * desc)528 Lexer & Lexer::expecting(const char * desc) {
529     return expecting(stows(desc));
530 };
531 
getString()532 std::basic_string<wchar_t> Lexer::getString() {
533     if (keep_tokens.empty()) return L"";
534     else return keep_tokens.front().first;
535 };
536 
537 
538 class PHYSFSFiller : public Filler {
539 private:
540     FILE * f;
541 public:
PHYSFSFiller(FILE * f)542     PHYSFSFiller(FILE *f) : f(f) {}
fill(char * buffer,size_t atmost)543     size_t fill(char * buffer,size_t atmost) {
544         size_t res=fread(buffer,1,atmost,f);
545         if (res<0) return 0;
546         else return res;
547     };
548 };
549 
StreamTokenizer(std::istream & i)550 StreamTokenizer::StreamTokenizer(std::istream & i)
551         : f(new STDStreamFiller(i)),sc(f) {};
552 
StreamTokenizer(FILE * i)553 StreamTokenizer::StreamTokenizer(FILE * i)
554         : f(new PHYSFSFiller(i)),sc(f) {};
555 
~StreamTokenizer()556 StreamTokenizer::~StreamTokenizer() {
557     delete f;
558 }
559 
readNext()560 wchar_t StreamTokenizer::readNext() {
561     wchar_t c;
562     if (!sc.read(&c,1)) {
563         CHERROR<<"StreamTokenizer: wchar_t is not big enough!"<<ENDL;
564         exit(1);
565     };
566     return c;
567 };
568 
eof()569 bool StreamTokenizer::eof() {
570     return sc.eof();
571 };
572 
error()573 bool StreamTokenizer::error() {
574     return sc.error();
575 };
576