spacejunk-1.0.5/src/parsercombinators.cpp

/*
  Copyright (C) 2009 Facundo Domínguez

  This file is part of Spacejunk.

  Spacejunk is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  Foobar is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "parsercombinators.h"
#include <sstream>
#include <iomanip>
#include <iostream>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "debugmsg.h"

using namespace std;


Parser::Parser(Tokenizer * tok): tok(tok) {
    errorflag=false;
    st.tokCount=0;
    eofCount=0;
    st.expected=NULL;
    getNextToken();
};

Parser::~Parser() {
    clear();
}

Parser & Parser::operator = (Tokenizer * tok) {
    clear();
    this->tok=tok;
    st.tokCount=0;
    eofCount=0;
    getNextToken();
    return *this;
}

void Parser::clear() {
    errorflag=false;
    if (st.expected) {
        delete st.expected;
        st.expected=NULL;
    }
    while (!keep_tokens.empty()) popState();
    while (!recorded_tokens.empty()) {
        delete recorded_tokens.front();
        recorded_tokens.pop_front();
    }
};

void Parser::setError() {
    errorflag=true;
}
void Parser::clearError() {
    errorflag=false;
};
bool Parser::error() {
    return errorflag || (recorded_tokens.empty() && tok->error());
}
std::string Parser::errorMessage() {
    ostringstream temp;
    temp<<"line "<<tokst.line<<", column "<<tokst.column<<": ";
    if (st.expected) {
        list<basic_string<wchar_t> >::iterator i=st.expected->begin();
        if (!st.expected->empty()) {
            temp<<"expecting "<<wstos(*i++);
            if (i!=st.expected->end()) {
                list<basic_string<wchar_t> >::iterator end=--st.expected->end();
                for (;i!=end;i++) temp<<", "<<wstos(*i);
                temp<<" or "<<wstos(*i);
            }
            temp<<" but found: "<<wstos(tokst.text);
        } else temp<<"unexpected: "<<wstos(tokst.text);
    } else temp<<"unexpected: "<<wstos(tokst.text);
    for (std::list<std::list<Tokenizer::TokenST> *>::iterator i=recorded_tokens.begin();
            i!=recorded_tokens.end();i++)
        for (std::list<Tokenizer::TokenST>::iterator j=(*i)->begin();j!=(*i)->end();j++)
            temp<<" "<<wstos(j->text);
    return temp.str();
};
void Parser::getNextToken() {
    if (!keep_tokens.empty())
        keep_tokens.front().first->push_back(tokst);
    if (st.expected) st.expected->clear();
    if (recorded_tokens.empty()) {
        if (tok->eof()) {
            if (eofCount==st.tokCount) {
                setError();
                return;
            }
        } else if (!tok->getNext(&tokst)) {
            setError();
            return;
        }
    } else {
        tokst=recorded_tokens.front()->front();
        recorded_tokens.front()->pop_front();
        if (recorded_tokens.front()->empty()) {
            delete recorded_tokens.front();
            recorded_tokens.pop_front();
        }
    }
    st.tokCount++;
    if (!eofCount && tok->eof()) eofCount=st.tokCount+1;
};
void Parser::pushState() {
    keep_tokens.push_front(make_pair(new list<Tokenizer::TokenST>(),st));
    if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
}
void Parser::restoreState() {
    assert(!keep_tokens.empty());
    if (keep_tokens.front().first->empty()) {
        delete keep_tokens.front().first;
        if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
        keep_tokens.pop_front();
    } else {
        recorded_tokens.push_front(keep_tokens.front().first);
        ParserST st=keep_tokens.front().second;
        getNextToken();
        keep_tokens.pop_front();

        if (this->st.expected) delete this->st.expected;
        this->st=st;
    }
}
void Parser::popState() {
    assert(!keep_tokens.empty());
    if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
    KeptTokens * l=keep_tokens.front().first;
    keep_tokens.pop_front();
    if (!keep_tokens.empty()) keep_tokens.front().first->splice(keep_tokens.front().first->end(),*l);
    delete l;
}
Parser & Parser::expecting(const std::basic_string<wchar_t> & desc) {
    if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
    st.expected->push_back(desc);
    return *this;
};
Parser & Parser::expecting(const char * desc) {
    return expecting(stows(desc));
};


Parser & Parser::readAnyToken(int* code,std::basic_string<wchar_t> * text) {
    if (error())
        return *this;
	if (code)
		*code = tokst.code;
	if (text) *text=tokst.text;
    getNextToken();
    return *this;
}

Parser & Parser::readToken(int code,std::basic_string<wchar_t> * text) {
    if (tokst.code!=code) {
        setError();
        return *this;
    }
	return readAnyToken(NULL,text);
};

Parser & Parser::readToken(int code,std::string * text) {
    basic_string<wchar_t> t;
    readToken(code,&t);
    if (text) *text=wstos(t);
    return *this;
};

inline bool Parser::reachedEof() {
    return eofCount<=st.tokCount && tok->eof();
}

Parser & Parser::eof() {
    if (!reachedEof()) setError();
    return *this;
};

Parser & Parser::readAny(wchar_t * c) {
    if (tokst.text.length()!=1) {
        setError();
        return *this;
    }
    if (c) *c=tokst.text[0];
    getNextToken();
    return *this;
};

Parser & Parser::readChar(wchar_t c) {
    if (tokst.text.length()==1 && tokst.text[0]==c)
        getNextToken();
    else setError();
    return *this;
};

Parser & Parser::readstring(std::basic_string<wchar_t> seq) {
    bool eq=true;
    int pos=0;
    while (!error() && int(seq.length())>pos && int(seq.length())-pos>=int(tokst.text.length())
            && (eq=!seq.substr(pos,tokst.text.length()).compare(0,tokst.text.length(),tokst.text))) {
        pos+=tokst.text.length();
        getNextToken();
    }
    if (!eq || int(seq.length())>pos) {
        //tokst.text=seq.substr(0,pos)+tokst.text;
        setError();
    }
    return *this;
};

Parser & Parser::readstring(const char * seq) {
    return readstring(stows(seq));
}


ConfigTokenizer::ConfigTokenizer(CharTokenizer *ctok): l(ctok) {
};

#define WSNEOLN MANY(NOTFOLLOWBY(CHAR('\n'))CHARCHECK(iswspace))
#define ANYCHAR PO(readAny(NULL))

bool ConfigTokenizer::getNext(TokenST*tok) {
    if (l.reachedEof()) return false;
    wchar_t c;
    tok->code=TOKUNKNOWN;
    tok->line=l.st.line;
    tok->column=l.st.column;
    // Descarto las lineas que empiezan con #
    PARSEbegin(Lexer,l)
    MANYbegin;
        TRY(S("<!--"));
        MANYbegin;
            TRY(NOTFOLLOWBY(S("-->")))ANYCHAR;
            MANY(NOTONEOF("-"));
        MANYend;
        S("-->");
        WS;
    MANYend;

    tok->code=TOKUNKNOWN;
    tok->line=l.st.line;
    tok->column=l.st.column;
    tok->text=L"";
    switch (l.getCurrentChar()) {
    case L'<' :
    case L'>':
        PO(readAny(&c));
        tok->text=c;
        WS;
        break;
    case L'/' :
    case L'=':
    case L'?':
        PO(readAny(&c));
        tok->text=c;
        break;
    case L'\"':
        LATTTEXT(&tok->text);
        tok->code=TOKATTTEXT;
        break;
    default:
        if (iswspace(l.getCurrentChar())) {
            WS1;
            tok->text=L" ";
        } else {
            tok->code=TOKID;
            ID(&tok->text);
        }
    }
    ENDBLOCK;
    PARSEend;
    return !l.error();
};
bool ConfigTokenizer::eof() {
    return l.reachedEof();
}
bool ConfigTokenizer::error() {
    return l.error();
}


Lexer::Lexer(CharTokenizer * tok): tok(tok) {
    errorflag=false;
    st.expected=NULL;
    st.line=1;
    st.column=0;
    st.charCount=0;
    eofCount=0;
    if (tok->eof()) eofCount=st.charCount+1;
    getNextToken();
};

Lexer::~Lexer() {
    clear();
}

Lexer & Lexer::operator = (CharTokenizer * tok) {
    clear();
    this->tok=tok;
    st.line=0;
    st.column=1;
    st.charCount=0;
    eofCount=0;
    if (tok->eof()) eofCount=st.charCount+1;
    getNextToken();
    return *this;
}

void Lexer::clear() {
    errorflag=false;
    while (!keep_tokens.empty()) popState();
    while (!recorded_tokens.empty()) {
        recorded_tokens.pop_front();
    }
    if (st.expected) {
        delete st.expected;
        st.expected=NULL;
    }
};

void Lexer::setError() {
    errorflag=true;
}
void Lexer::clearError() {
    errorflag=false;
};
bool Lexer::error() {
    return errorflag || (recorded_tokens.empty() && tok->error() && !(tok->eof() && st.charCount<eofCount));
}
wchar_t Lexer::getNextToken() {
    if (st.expected) st.expected->clear();
    if (!keep_tokens.empty())
        keep_tokens.front().first+=currentChar;
    if (recorded_tokens.empty())
        if (tok->eof()) {
            if (eofCount==st.charCount) {
                setError();
                currentChar=btowc('\0');
                return currentChar;
            }
        } else currentChar=tok->readNext();
    else {
        currentChar=recorded_tokens.front()[0];
        recorded_tokens.front().erase(0,1);
        if (recorded_tokens.front().length()==0) {
            recorded_tokens.pop_front();
        }
    };
    st.charCount++;
    if (!eofCount && tok->eof()) eofCount=st.charCount+1;
    if (!tok->eof()) {
        if (currentChar==L'\n') {
            st.line++;
            st.column=0;
        } else if (currentChar!=L'\r') st.column++;
    }
    return currentChar;
};
void Lexer::pushState() {
    keep_tokens.push_front(make_pair(L"",st));
    if (st.expected) st.expected=new list<std::basic_string<wchar_t> >(*st.expected);
}
void Lexer::restoreState() {
    assert(!keep_tokens.empty());
    if (keep_tokens.front().first.length()==0) {
        if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
        keep_tokens.pop_front();
    } else {
        recorded_tokens.push_front(keep_tokens.front().first);
        if (!reachedEof()) recorded_tokens.front()+=currentChar;
        LexerST st=keep_tokens.front().second;
        keep_tokens.pop_front();
        getNextToken();
        if (this->st.expected) delete this->st.expected;
        this->st=st;
    }
}
void Lexer::popState() {
    assert(!keep_tokens.empty());
    if (keep_tokens.front().second.expected) delete keep_tokens.front().second.expected;
    std::basic_string<wchar_t> s=keep_tokens.front().first;
    keep_tokens.pop_front();
    if (!keep_tokens.empty()) keep_tokens.front().first+=s;
}
Lexer & Lexer::readChar(wchar_t c) {
    if (currentChar!=c || reachedEof()) {
        setError();
        return *this;
    }
    getNextToken();
    return *this;
};

inline bool Lexer::reachedEof() {
    return eofCount<=st.charCount && tok->eof();
};

Lexer & Lexer::readAny(wchar_t * c) {
    if (c) *c=currentChar;
    getNextToken();
    return *this;
};

Lexer & Lexer::eof() {
    if (!reachedEof()) setError();
    return *this;
};

Lexer & Lexer::oneOf(const wchar_t * cs) {
    for (;*cs!=L'\0' && !error();cs++) if (currentChar==*cs) {
            getNextToken();
            return *this;
        }
    setError();
    return *this;
};

Lexer & Lexer::oneOf(const char * cs) {
    for (;*cs!='\0' && !error();cs++) if (wctob(currentChar)==*cs) {
            getNextToken();
            return *this;
        }
    setError();
    return *this;
};

Lexer & Lexer::notOneOf(const wchar_t * cs) {
    for (;*cs!=L'\0';cs++) if (currentChar==*cs) {
            setError();
            return *this;
        }
    getNextToken();
    return *this;
};

Lexer & Lexer::notOneOf(const char * cs) {
    for (;*cs!='\0';cs++) if (wctob(currentChar)==*cs) {
            setError();
            return *this;
        }
    getNextToken();
    return *this;
};

Lexer & Lexer::readstring(const wchar_t * cs) {
    for (;*cs!='\0' && !error() && currentChar==*cs ;cs++)
        getNextToken();
    if (*cs!='\0')
        setError();
    return *this;
};

Lexer & Lexer::readstring(const char * cs) {
    return readstring(stows(cs).c_str());
}


std::string Lexer::errorMessage() {
    ostringstream temp;
    string desc;
    if (reachedEof()) desc="end of input";
    else if (tok->error()) desc="read error";
    else desc=currentChar;
    temp<<"line "<<st.line<<", column "<<st.column<<": ";
    if (st.expected) {
        list<basic_string<wchar_t> >::iterator i=st.expected->begin();
        if (!st.expected->empty()) {
            temp<<"expecting "<<wstos(*i++);
            if (i!=st.expected->end()) {
                list<basic_string<wchar_t> >::iterator end=--st.expected->end();
                for (;i!=end;i++) temp<<", "<<wstos(*i);
                temp<<" or "<<wstos(*i);
            }
            temp<<" but found: "<<desc;
        } else temp<<"unexpected: "<<desc;
    } else temp<<"unexpected: "<<desc;
    for (list<basic_string<wchar_t> >::iterator i=recorded_tokens.begin();i!=recorded_tokens.end();i++)
        temp<<wstos(*i);
    return temp.str();
};

Lexer & Lexer::readFloat(double * d) {
    basic_string<wchar_t> str;
    PARSEbegin(Lexer,*this);
    INPUT(OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit))
          OPT(CHAR('.')MANY1(CHARCHECK(isdigit)) OR POK)
          OPT(CHAR('e')OPT(CHAR('+')OR CHAR('-')OR POK)MANY1(CHARCHECK(isdigit)) OR POK)
          ,&str);
    *d=wcstod(str.c_str(),NULL);
    if (isnan(*d)) PERROR;
    PARSEend;
    return *this;
}

Lexer & Lexer::readInt(int * d) {
    basic_string<wchar_t> str;
    PARSEbegin(Lexer,*this);
    INPUT(MANY1(CHARCHECK(iswdigit)),&str);
    const wchar_t * p=str.c_str();
    wchar_t * tail;
    *d=wcstol(p,&tail,10);
    if (*tail!=L'\0') PERROR;
    PARSEend;
    return *this;
}

Lexer & Lexer::expecting(const std::basic_string<wchar_t> & desc) {
    if (!st.expected) st.expected=new std::list<std::basic_string<wchar_t> >();
    st.expected->push_back(desc);
    return *this;
};

Lexer & Lexer::expecting(const char * desc) {
    return expecting(stows(desc));
};

std::basic_string<wchar_t> Lexer::getString() {
    if (keep_tokens.empty()) return L"";
    else return keep_tokens.front().first;
};


class PHYSFSFiller : public Filler {
private:
    FILE * f;
public:
    PHYSFSFiller(FILE *f) : f(f) {}
    size_t fill(char * buffer,size_t atmost) {
        size_t res=fread(buffer,1,atmost,f);
        if (res<0) return 0;
        else return res;
    };
};

StreamTokenizer::StreamTokenizer(std::istream & i)
        : f(new STDStreamFiller(i)),sc(f) {};

StreamTokenizer::StreamTokenizer(FILE * i)
        : f(new PHYSFSFiller(i)),sc(f) {};

StreamTokenizer::~StreamTokenizer() {
    delete f;
}

wchar_t StreamTokenizer::readNext() {
    wchar_t c;
    if (!sc.read(&c,1)) {
        CHERROR<<"StreamTokenizer: wchar_t is not big enough!"<<ENDL;
        exit(1);
    };
    return c;
};

bool StreamTokenizer::eof() {
    return sc.eof();
};

bool StreamTokenizer::error() {
    return sc.error();
};