1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "stream.h" 7 #include <string> 8 #include <vector> 9 10 namespace embree 11 { 12 /*! token class */ 13 class Token 14 { 15 public: 16 17 enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL }; 18 ty(TY_EOF)19 Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {} ty(TY_CHAR)20 Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {} ty(TY_INT)21 Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {} ty(TY_FLOAT)22 Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {} ty(ty)23 Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {} 24 Eof()25 static Token Eof() { return Token(); } Sym(std::string str)26 static Token Sym(std::string str) { return Token(str,TY_SYMBOL); } Str(std::string str)27 static Token Str(std::string str) { return Token(str,TY_STRING); } Id(std::string str)28 static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); } 29 Char()30 char Char() const { 31 if (ty == TY_CHAR) return c; 32 THROW_RUNTIME_ERROR(loc.str()+": character expected"); 33 } 34 Int()35 int Int() const { 36 if (ty == TY_INT) return i; 37 THROW_RUNTIME_ERROR(loc.str()+": integer expected"); 38 } 39 40 float Float(bool cast = true) const { 41 if (ty == TY_FLOAT) return f; 42 if (ty == TY_INT && cast) return (float)i; 43 THROW_RUNTIME_ERROR(loc.str()+": float expected"); 44 } 45 Identifier()46 std::string Identifier() const { 47 if (ty == TY_IDENTIFIER) return str; 48 THROW_RUNTIME_ERROR(loc.str()+": identifier expected"); 49 } 50 String()51 std::string String() const { 52 if (ty == TY_STRING) return str; 53 THROW_RUNTIME_ERROR(loc.str()+": string expected"); 54 } 55 Symbol()56 std::string Symbol() const { 57 if (ty == TY_SYMBOL) return str; 58 THROW_RUNTIME_ERROR(loc.str()+": symbol expected"); 59 } 60 Location()61 const ParseLocation& Location() const { return loc; } 62 63 friend bool operator==(const Token& a, const Token& b) 64 { 65 if (a.ty != b.ty) return false; 66 if (a.ty == TY_CHAR) return a.c == b.c; 67 if (a.ty == TY_INT) return a.i == b.i; 68 if (a.ty == TY_FLOAT) return a.f == b.f; 69 if (a.ty == TY_IDENTIFIER) return a.str == b.str; 70 if (a.ty == TY_STRING) return a.str == b.str; 71 if (a.ty == TY_SYMBOL) return a.str == b.str; 72 return true; 73 } 74 75 friend bool operator!=(const Token& a, const Token& b) { 76 return !(a == b); 77 } 78 79 friend bool operator <( const Token& a, const Token& b ) { 80 if (a.ty != b.ty) return (int)a.ty < (int)b.ty; 81 if (a.ty == TY_CHAR) return a.c < b.c; 82 if (a.ty == TY_INT) return a.i < b.i; 83 if (a.ty == TY_FLOAT) return a.f < b.f; 84 if (a.ty == TY_IDENTIFIER) return a.str < b.str; 85 if (a.ty == TY_STRING) return a.str < b.str; 86 if (a.ty == TY_SYMBOL) return a.str < b.str; 87 return false; 88 } 89 90 friend std::ostream& operator<<(std::ostream& cout, const Token& t) 91 { 92 if (t.ty == TY_EOF) return cout << "eof"; 93 if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")"; 94 if (t.ty == TY_INT) return cout << "Int(" << t.i << ")"; 95 if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")"; 96 if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")"; 97 if (t.ty == TY_STRING) return cout << "String(" << t.str << ")"; 98 if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")"; 99 return cout << "unknown"; 100 } 101 102 private: 103 Type ty; //< the type of the token 104 union { 105 char c; //< data for char tokens 106 int i; //< data for int tokens 107 float f; //< data for float tokens 108 }; 109 std::string str; //< data for string and identifier tokens 110 ParseLocation loc; //< the location the token is from 111 }; 112 113 /*! build full tokenizer that takes list of valid characters and keywords */ 114 class TokenStream : public Stream<Token> 115 { 116 public: 117 118 /*! shorthands for common sets of characters */ 119 static const std::string alpha; 120 static const std::string ALPHA; 121 static const std::string numbers; 122 static const std::string separators; 123 static const std::string stringChars; 124 125 public: 126 TokenStream(const Ref<Stream<int> >& cin, 127 const std::string& alpha, //< valid characters for identifiers 128 const std::string& seps, //< characters that act as separators 129 const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols 130 public: location()131 ParseLocation location() { return cin->loc(); } 132 Token next(); 133 bool trySymbol(const std::string& symbol); 134 135 private: 136 void skipSeparators(); 137 bool decDigits(std::string& str); 138 bool decDigits1(std::string& str); 139 bool trySymbols(Token& token, const ParseLocation& loc); 140 bool tryFloat(Token& token, const ParseLocation& loc); 141 bool tryInt(Token& token, const ParseLocation& loc); 142 bool tryString(Token& token, const ParseLocation& loc); 143 bool tryIdentifier(Token& token, const ParseLocation& loc); 144 145 Ref<Stream<int> > cin; 146 bool isSepMap[256]; 147 bool isAlphaMap[256]; 148 bool isStringCharMap[256]; 149 std::vector<std::string> symbols; 150 151 /*! checks if a character is a separator */ isSeparator(unsigned int c)152 __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; } 153 154 /*! checks if a character is a number */ isDigit(unsigned int c)155 __forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; } 156 157 /*! checks if a character is valid inside a string */ isStringChar(unsigned int c)158 __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; } 159 160 /*! checks if a character is legal for an identifier */ isAlpha(unsigned int c)161 __forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; } isAlphaNum(unsigned int c)162 __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); } 163 }; 164 } 165