1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "stream.h"
7 #include <string>
8 #include <vector>
9 
10 namespace embree
11 {
12   /*! token class */
13   class Token
14   {
15   public:
16 
17     enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
18 
ty(TY_EOF)19     Token (        const ParseLocation& loc = ParseLocation()) : ty(TY_EOF  ),       loc(loc) {}
ty(TY_CHAR)20     Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
ty(TY_INT)21     Token (int i,  const ParseLocation& loc = ParseLocation()) : ty(TY_INT  ), i(i), loc(loc) {}
ty(TY_FLOAT)22     Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
ty(ty)23     Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty),   str(str), loc(loc) {}
24 
Eof()25     static Token Eof()                { return Token(); }
Sym(std::string str)26     static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
Str(std::string str)27     static Token Str(std::string str) { return Token(str,TY_STRING); }
Id(std::string str)28     static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
29 
Char()30     char Char() const {
31       if (ty == TY_CHAR) return c;
32       THROW_RUNTIME_ERROR(loc.str()+": character expected");
33     }
34 
Int()35     int Int() const {
36       if (ty == TY_INT) return i;
37       THROW_RUNTIME_ERROR(loc.str()+": integer expected");
38     }
39 
40     float Float(bool cast = true)  const {
41       if (ty == TY_FLOAT) return f;
42       if (ty == TY_INT && cast) return (float)i;
43       THROW_RUNTIME_ERROR(loc.str()+": float expected");
44     }
45 
Identifier()46     std::string Identifier() const {
47       if (ty == TY_IDENTIFIER) return str;
48       THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
49     }
50 
String()51     std::string String() const {
52       if (ty == TY_STRING) return str;
53       THROW_RUNTIME_ERROR(loc.str()+": string expected");
54     }
55 
Symbol()56     std::string Symbol() const {
57       if (ty == TY_SYMBOL) return str;
58       THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
59     }
60 
Location()61     const ParseLocation& Location() const { return loc; }
62 
63     friend bool operator==(const Token& a, const Token& b)
64     {
65       if (a.ty != b.ty) return false;
66       if (a.ty == TY_CHAR) return a.c == b.c;
67       if (a.ty == TY_INT) return a.i == b.i;
68       if (a.ty == TY_FLOAT) return a.f == b.f;
69       if (a.ty == TY_IDENTIFIER) return a.str == b.str;
70       if (a.ty == TY_STRING) return a.str == b.str;
71       if (a.ty == TY_SYMBOL) return a.str == b.str;
72       return true;
73     }
74 
75     friend bool operator!=(const Token& a, const Token& b) {
76       return !(a == b);
77     }
78 
79     friend bool operator <( const Token& a, const Token& b ) {
80       if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
81       if (a.ty == TY_CHAR) return a.c < b.c;
82       if (a.ty == TY_INT) return a.i < b.i;
83       if (a.ty == TY_FLOAT) return a.f < b.f;
84       if (a.ty == TY_IDENTIFIER) return a.str < b.str;
85       if (a.ty == TY_STRING) return a.str < b.str;
86       if (a.ty == TY_SYMBOL) return a.str < b.str;
87       return false;
88     }
89 
90     friend std::ostream& operator<<(std::ostream& cout, const Token& t)
91     {
92       if (t.ty == TY_EOF) return cout << "eof";
93       if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
94       if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
95       if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
96       if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
97       if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
98       if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
99       return cout << "unknown";
100     }
101 
102   private:
103     Type ty;            //< the type of the token
104     union {
105       char c;           //< data for char tokens
106       int i;            //< data for int tokens
107       float f;          //< data for float tokens
108     };
109     std::string str;    //< data for string and identifier tokens
110     ParseLocation loc;  //< the location the token is from
111   };
112 
113   /*! build full tokenizer that takes list of valid characters and keywords */
114   class TokenStream : public Stream<Token>
115   {
116   public:
117 
118     /*! shorthands for common sets of characters */
119     static const std::string alpha;
120     static const std::string ALPHA;
121     static const std::string numbers;
122     static const std::string separators;
123     static const std::string stringChars;
124 
125   public:
126     TokenStream(const Ref<Stream<int> >& cin,
127                 const std::string& alpha, //< valid characters for identifiers
128                 const std::string& seps,  //< characters that act as separators
129                 const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
130   public:
location()131     ParseLocation location() { return cin->loc(); }
132     Token next();
133     bool trySymbol(const std::string& symbol);
134 
135   private:
136     void skipSeparators();
137     bool decDigits(std::string& str);
138     bool decDigits1(std::string& str);
139     bool trySymbols(Token& token, const ParseLocation& loc);
140     bool tryFloat(Token& token, const ParseLocation& loc);
141     bool tryInt(Token& token, const ParseLocation& loc);
142     bool tryString(Token& token, const ParseLocation& loc);
143     bool tryIdentifier(Token& token, const ParseLocation& loc);
144 
145     Ref<Stream<int> > cin;
146     bool isSepMap[256];
147     bool isAlphaMap[256];
148     bool isStringCharMap[256];
149     std::vector<std::string> symbols;
150 
151     /*! checks if a character is a separator */
isSeparator(unsigned int c)152     __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
153 
154     /*! checks if a character is a number */
isDigit(unsigned int c)155     __forceinline bool isDigit(unsigned int c) const {  return c >= '0' && c <= '9'; }
156 
157     /*! checks if a character is valid inside a string */
isStringChar(unsigned int c)158     __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
159 
160     /*! checks if a character is legal for an identifier */
isAlpha(unsigned int c)161     __forceinline bool isAlpha(unsigned int c) const {  return c<256 && isAlphaMap[c];  }
isAlphaNum(unsigned int c)162     __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
163   };
164 }
165