1 #ifndef COMPILER_SCANNER_H_INCLUDED 2 #define COMPILER_SCANNER_H_INCLUDED 3 4 #include <cctype> 5 #include <string> 6 #include <iosfwd> 7 #include <vector> 8 #include <sstream> 9 10 #include "tokenloc.hpp" 11 12 namespace Compiler 13 { 14 class ErrorHandler; 15 class Parser; 16 class Extensions; 17 18 /// \brief Scanner 19 /// 20 /// This class translate a char-stream to a token stream (delivered via 21 /// parser-callbacks). 22 23 class MultiChar 24 { 25 public: MultiChar()26 MultiChar() 27 { 28 blank(); 29 } 30 MultiChar(const char ch)31 explicit MultiChar(const char ch) 32 { 33 blank(); 34 mData[0] = ch; 35 36 mLength = getCharLength(ch); 37 } 38 getCharLength(const char ch)39 static int getCharLength(const char ch) 40 { 41 unsigned char c = ch; 42 if (c<=127) return 0; 43 else if ((c & 0xE0) == 0xC0) return 1; 44 else if ((c & 0xF0) == 0xE0) return 2; 45 else if ((c & 0xF8) == 0xF0) return 3; 46 else return -1; 47 } 48 operator ==(const char ch)49 bool operator== (const char ch) 50 { 51 return mData[0]==ch && mData[1]==0 && mData[2]==0 && mData[3]==0; 52 } 53 operator ==(const MultiChar & ch)54 bool operator== (const MultiChar& ch) 55 { 56 return mData[0]==ch.mData[0] && mData[1]==ch.mData[1] && mData[2]==ch.mData[2] && mData[3]==ch.mData[3]; 57 } 58 operator !=(const char ch)59 bool operator!= (const char ch) 60 { 61 return mData[0]!=ch || mData[1]!=0 || mData[2]!=0 || mData[3]!=0; 62 } 63 isWhitespace()64 bool isWhitespace() 65 { 66 return (mData[0]==' ' || mData[0]=='\t') && mData[1]==0 && mData[2]==0 && mData[3]==0; 67 } 68 isDigit()69 bool isDigit() 70 { 71 return std::isdigit(mData[0]) && mData[1]==0 && mData[2]==0 && mData[3]==0; 72 } 73 isMinusSign()74 bool isMinusSign() 75 { 76 if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0) 77 return true; 78 79 return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0; 80 } 81 isAlpha()82 bool isAlpha() 83 { 84 if (isMinusSign()) 85 return false; 86 87 return std::isalpha(mData[0]) || mData[1]!=0 || mData[2]!=0 || mData[3]!=0; 88 } 89 appendTo(std::string & str)90 void appendTo(std::string& str) 91 { 92 for (int i = 0; i <= mLength; i++) 93 str += mData[i]; 94 } 95 putback(std::istream & in)96 void putback (std::istream& in) 97 { 98 for (int i = mLength; i >= 0; i--) 99 in.putback (mData[i]); 100 } 101 getFrom(std::istream & in)102 bool getFrom(std::istream& in) 103 { 104 blank(); 105 106 char ch = static_cast<char>(in.peek()); 107 108 if (!in.good()) 109 return false; 110 111 int length = getCharLength(ch); 112 if (length < 0) return false; 113 114 for (int i = 0; i <= length; i++) 115 { 116 in.get (ch); 117 118 if (!in.good()) 119 return false; 120 121 mData[i] = ch; 122 } 123 124 mLength = length; 125 126 return true; 127 } 128 peek(std::istream & in)129 bool peek(std::istream& in) 130 { 131 std::streampos p_orig = in.tellg(); 132 133 char ch = static_cast<char>(in.peek()); 134 135 if (!in.good()) 136 return false; 137 138 int length = getCharLength(ch); 139 if (length < 0) return false; 140 141 for (int i = 0; i <= length; i++) 142 { 143 in.get (ch); 144 145 if (!in.good()) 146 return false; 147 148 mData[i] = ch; 149 } 150 151 mLength = length; 152 153 in.seekg(p_orig); 154 return true; 155 }; 156 blank()157 void blank() 158 { 159 std::fill(std::begin(mData), std::end(mData), '\0'); 160 mLength = -1; 161 } 162 data()163 std::string data() 164 { 165 // NB: mLength is the number of the last element in the array 166 return std::string(mData, mLength + 1); 167 } 168 169 private: 170 char mData[4]{}; 171 int mLength{}; 172 }; 173 174 class Scanner 175 { 176 enum putback_type 177 { 178 Putback_None, Putback_Special, Putback_Integer, Putback_Float, 179 Putback_Name, Putback_Keyword 180 }; 181 182 ErrorHandler& mErrorHandler; 183 TokenLoc mLoc; 184 TokenLoc mPrevLoc; 185 std::istream& mStream; 186 const Extensions *mExtensions; 187 putback_type mPutback; 188 int mPutbackCode; 189 int mPutbackInteger; 190 float mPutbackFloat; 191 std::string mPutbackName; 192 TokenLoc mPutbackLoc; 193 bool mStrictKeywords; 194 bool mTolerantNames; 195 bool mIgnoreNewline; 196 197 public: 198 199 enum keyword 200 { 201 K_begin, K_end, 202 K_short, K_long, K_float, 203 K_if, K_endif, K_else, K_elseif, 204 K_while, K_endwhile, 205 K_return, 206 K_messagebox, 207 K_set, K_to, 208 K_getsquareroot 209 }; 210 211 enum special 212 { 213 S_newline, 214 S_open, S_close, 215 S_cmpEQ, S_cmpNE, S_cmpLT, S_cmpLE, S_cmpGT, S_cmpGE, 216 S_plus, S_minus, S_mult, S_div, 217 S_comma, 218 S_ref, 219 S_member 220 }; 221 222 private: 223 224 // not implemented 225 226 Scanner (const Scanner&); 227 Scanner& operator= (const Scanner&); 228 229 bool get (MultiChar& c); 230 231 void putback (MultiChar& c); 232 233 bool scanToken (Parser& parser); 234 235 bool scanInt (MultiChar& c, Parser& parser, bool& cont); 236 237 bool scanFloat (const std::string& intValue, Parser& parser, bool& cont); 238 239 bool scanName (MultiChar& c, Parser& parser, bool& cont); 240 241 /// \param name May contain the start of the name (one or more characters) 242 bool scanName (std::string& name); 243 244 bool scanSpecial (MultiChar& c, Parser& parser, bool& cont); 245 246 bool isStringCharacter (MultiChar& c, bool lookAhead = true); 247 248 public: 249 250 Scanner (ErrorHandler& errorHandler, std::istream& inputStream, 251 const Extensions *extensions = nullptr); 252 ///< constructor 253 254 void scan (Parser& parser); 255 ///< Scan a token and deliver it to the parser. 256 257 void putbackSpecial (int code, const TokenLoc& loc); 258 ///< put back a special token 259 260 void putbackInt (int value, const TokenLoc& loc); 261 ///< put back an integer token 262 263 void putbackFloat (float value, const TokenLoc& loc); 264 ///< put back a float token 265 266 void putbackName (const std::string& name, const TokenLoc& loc); 267 ///< put back a name token 268 269 void putbackKeyword (int keyword, const TokenLoc& loc); 270 ///< put back a keyword token 271 272 void listKeywords (std::vector<std::string>& keywords); 273 ///< Append all known keywords to \a keywords. 274 275 /// Treat newline character as a part of script command. 276 /// 277 /// \attention This mode lasts only until the next keyword is reached. 278 void enableIgnoreNewlines(); 279 280 /// Do not accept keywords in quotation marks anymore. 281 /// 282 /// \attention This mode lasts only until the next newline is reached. 283 void enableStrictKeywords(); 284 285 /// Continue parsing a name when hitting a '.' or a '-' 286 /// 287 /// \attention This mode lasts only until the next newline is reached. 288 void enableTolerantNames(); 289 }; 290 } 291 292 #endif 293