1 #ifndef COMPILER_SCANNER_H_INCLUDED
2 #define COMPILER_SCANNER_H_INCLUDED
3 
4 #include <cctype>
5 #include <string>
6 #include <iosfwd>
7 #include <vector>
8 #include <sstream>
9 
10 #include "tokenloc.hpp"
11 
12 namespace Compiler
13 {
14     class ErrorHandler;
15     class Parser;
16     class Extensions;
17 
18     /// \brief Scanner
19     ///
20     /// This class translate a char-stream to a token stream (delivered via
21     /// parser-callbacks).
22 
23     class MultiChar
24     {
25     public:
MultiChar()26         MultiChar()
27         {
28             blank();
29         }
30 
MultiChar(const char ch)31         explicit MultiChar(const char ch)
32         {
33             blank();
34             mData[0] = ch;
35 
36             mLength = getCharLength(ch);
37         }
38 
getCharLength(const char ch)39         static int getCharLength(const char ch)
40         {
41             unsigned char c = ch;
42             if (c<=127) return 0;
43             else if ((c & 0xE0) == 0xC0) return 1;
44             else if ((c & 0xF0) == 0xE0) return 2;
45             else if ((c & 0xF8) == 0xF0) return 3;
46             else return -1;
47         }
48 
operator ==(const char ch)49         bool operator== (const char ch)
50         {
51             return mData[0]==ch && mData[1]==0 && mData[2]==0 && mData[3]==0;
52         }
53 
operator ==(const MultiChar & ch)54         bool operator== (const MultiChar& ch)
55         {
56             return mData[0]==ch.mData[0] && mData[1]==ch.mData[1] && mData[2]==ch.mData[2] && mData[3]==ch.mData[3];
57         }
58 
operator !=(const char ch)59         bool operator!= (const char ch)
60         {
61             return mData[0]!=ch || mData[1]!=0 || mData[2]!=0 || mData[3]!=0;
62         }
63 
isWhitespace()64         bool isWhitespace()
65         {
66             return (mData[0]==' ' || mData[0]=='\t') && mData[1]==0 && mData[2]==0 && mData[3]==0;
67         }
68 
isDigit()69         bool isDigit()
70         {
71             return std::isdigit(mData[0]) && mData[1]==0 && mData[2]==0 && mData[3]==0;
72         }
73 
isMinusSign()74         bool isMinusSign()
75         {
76             if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0)
77                 return true;
78 
79             return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0;
80         }
81 
isAlpha()82         bool isAlpha()
83         {
84             if (isMinusSign())
85                 return false;
86 
87             return std::isalpha(mData[0]) || mData[1]!=0 || mData[2]!=0 || mData[3]!=0;
88         }
89 
appendTo(std::string & str)90         void appendTo(std::string& str)
91         {
92             for (int i = 0; i <= mLength; i++)
93                 str += mData[i];
94         }
95 
putback(std::istream & in)96         void putback (std::istream& in)
97         {
98             for (int i = mLength; i >= 0; i--)
99                 in.putback (mData[i]);
100         }
101 
getFrom(std::istream & in)102         bool getFrom(std::istream& in)
103         {
104             blank();
105 
106             char ch = static_cast<char>(in.peek());
107 
108             if (!in.good())
109                 return false;
110 
111             int length = getCharLength(ch);
112             if (length < 0) return false;
113 
114             for (int i = 0; i <= length; i++)
115             {
116                 in.get (ch);
117 
118                 if (!in.good())
119                     return false;
120 
121                 mData[i] = ch;
122             }
123 
124             mLength = length;
125 
126             return true;
127         }
128 
peek(std::istream & in)129         bool peek(std::istream& in)
130         {
131             std::streampos p_orig = in.tellg();
132 
133             char ch = static_cast<char>(in.peek());
134 
135             if (!in.good())
136                 return false;
137 
138             int length = getCharLength(ch);
139             if (length < 0) return false;
140 
141             for (int i = 0; i <= length; i++)
142             {
143                 in.get (ch);
144 
145                 if (!in.good())
146                     return false;
147 
148                 mData[i] = ch;
149             }
150 
151             mLength = length;
152 
153             in.seekg(p_orig);
154             return true;
155         };
156 
blank()157         void blank()
158         {
159             std::fill(std::begin(mData), std::end(mData), '\0');
160             mLength = -1;
161         }
162 
data()163         std::string data()
164         {
165             // NB: mLength is the number of the last element in the array
166             return std::string(mData, mLength + 1);
167         }
168 
169     private:
170         char mData[4]{};
171         int mLength{};
172     };
173 
174     class Scanner
175     {
176             enum putback_type
177             {
178                 Putback_None, Putback_Special, Putback_Integer, Putback_Float,
179                 Putback_Name, Putback_Keyword
180             };
181 
182             ErrorHandler& mErrorHandler;
183             TokenLoc mLoc;
184             TokenLoc mPrevLoc;
185             std::istream& mStream;
186             const Extensions *mExtensions;
187             putback_type mPutback;
188             int mPutbackCode;
189             int mPutbackInteger;
190             float mPutbackFloat;
191             std::string mPutbackName;
192             TokenLoc mPutbackLoc;
193             bool mStrictKeywords;
194             bool mTolerantNames;
195             bool mIgnoreNewline;
196 
197         public:
198 
199             enum keyword
200             {
201                 K_begin, K_end,
202                 K_short, K_long, K_float,
203                 K_if, K_endif, K_else, K_elseif,
204                 K_while, K_endwhile,
205                 K_return,
206                 K_messagebox,
207                 K_set, K_to,
208                 K_getsquareroot
209             };
210 
211             enum special
212             {
213                 S_newline,
214                 S_open, S_close,
215                 S_cmpEQ, S_cmpNE, S_cmpLT, S_cmpLE, S_cmpGT, S_cmpGE,
216                 S_plus, S_minus, S_mult, S_div,
217                 S_comma,
218                 S_ref,
219                 S_member
220             };
221 
222         private:
223 
224         // not implemented
225 
226             Scanner (const Scanner&);
227             Scanner& operator= (const Scanner&);
228 
229             bool get (MultiChar& c);
230 
231             void putback (MultiChar& c);
232 
233             bool scanToken (Parser& parser);
234 
235             bool scanInt (MultiChar& c, Parser& parser, bool& cont);
236 
237             bool scanFloat (const std::string& intValue, Parser& parser, bool& cont);
238 
239             bool scanName (MultiChar& c, Parser& parser, bool& cont);
240 
241             /// \param name May contain the start of the name (one or more characters)
242             bool scanName (std::string& name);
243 
244             bool scanSpecial (MultiChar& c, Parser& parser, bool& cont);
245 
246             bool isStringCharacter (MultiChar& c, bool lookAhead = true);
247 
248         public:
249 
250             Scanner (ErrorHandler& errorHandler, std::istream& inputStream,
251                 const Extensions *extensions = nullptr);
252             ///< constructor
253 
254             void scan (Parser& parser);
255             ///< Scan a token and deliver it to the parser.
256 
257             void putbackSpecial (int code, const TokenLoc& loc);
258             ///< put back a special token
259 
260             void putbackInt (int value, const TokenLoc& loc);
261             ///< put back an integer token
262 
263             void putbackFloat (float value, const TokenLoc& loc);
264             ///< put back a float token
265 
266             void putbackName (const std::string& name, const TokenLoc& loc);
267             ///< put back a name token
268 
269             void putbackKeyword (int keyword, const TokenLoc& loc);
270             ///< put back a keyword token
271 
272             void listKeywords (std::vector<std::string>& keywords);
273             ///< Append all known keywords to \a keywords.
274 
275             /// Treat newline character as a part of script command.
276             ///
277             /// \attention This mode lasts only until the next keyword is reached.
278             void enableIgnoreNewlines();
279 
280             /// Do not accept keywords in quotation marks anymore.
281             ///
282             /// \attention This mode lasts only until the next newline is reached.
283             void enableStrictKeywords();
284 
285             /// Continue parsing a name when hitting a '.' or a '-'
286             ///
287             /// \attention This mode lasts only until the next newline is reached.
288             void enableTolerantNames();
289     };
290 }
291 
292 #endif
293