1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef IGA_FRONTEND_PARSER_HPP 10 #define IGA_FRONTEND_PARSER_HPP 11 12 #include "BufferedLexer.hpp" 13 #include "../ErrorHandler.hpp" 14 #include "../Models/Models.hpp" 15 #include "../IR/Loc.hpp" 16 17 18 #include <cstdarg> 19 #include <initializer_list> 20 #include <ostream> 21 #include <sstream> 22 #include <stdexcept> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 namespace iga 28 { 29 template<typename T> using IdentMap = 30 std::initializer_list<std::pair<std::string,T>>; 31 template <typename T> Lookup(std::string sym,const IdentMap<T> & M,T orElse)32 static inline T Lookup(std::string sym, const IdentMap<T> &M, T orElse) { 33 for (const auto &e : M) { 34 if (e.first == sym) 35 return e.second; 36 } 37 return orElse; 38 } 39 40 41 // this type is used to bail out of the parsing algorithm upon syntax error 42 struct SyntaxError : std::runtime_error { 43 const Loc loc; 44 std::string message; 45 SyntaxErroriga::SyntaxError46 SyntaxError(const struct Loc &l, const std::string &m) throw () 47 : std::runtime_error(m) 48 , loc(l) 49 , message(m) 50 { 51 } ~SyntaxErroriga::SyntaxError52 ~SyntaxError() { } 53 }; 54 55 /////////////////////////////////////////////////////////////////////////// 56 // Recursive descent parser. 57 // The nomaclaure for method names is roughly: 58 // Looking**** peeks at the token, doesn't consume 59 // Looking**From peeks relative to the lexer's current offset 60 // Consume**** consume next token if some criteria is true 61 // Parse****** generally corresponds to a non-terminal or some 62 // complicated lexemes 63 // 64 // 65 class Parser { 66 protected: 67 BufferedLexer m_lexer; 68 ErrorHandler &m_errorHandler; 69 public: Parser(const std::string & inp,ErrorHandler & errHandler)70 Parser(const std::string &inp, ErrorHandler &errHandler) 71 : m_lexer(inp) 72 , m_errorHandler(errHandler) 73 { 74 } 75 76 ////////////////////////////////////////////////////////////////////// 77 // DEBUGGING 78 // void DumpLookaheads(int n = 1) const {m_lexer.DumpLookaheads(n); } ShowCurrentLexicalContext(std::ostream & os) const79 void ShowCurrentLexicalContext(std::ostream &os) const { 80 ShowCurrentLexicalContext(NextLoc(), os); 81 } 82 void ShowCurrentLexicalContext(const Loc &loc, std::ostream &os) const; 83 84 ////////////////////////////////////////////////////////////////////// 85 // WARNINGS and ERRORS 86 template <typename...Ts> WarningT(Ts...ts)87 void WarningT(Ts...ts) {WarningS(NextLoc(), iga::format(ts...));} 88 template <typename...Ts> WarningAtT(const Loc & loc,Ts...ts)89 void WarningAtT(const Loc &loc, Ts...ts) { 90 WarningS(loc, iga::format(ts...)); 91 } 92 void WarningS(const Loc &loc, const std::string &msg); 93 94 95 template <typename...Ts> ErrorT(Ts...ts)96 void ErrorT(Ts...ts) {ErrorAtS(NextLoc(), iga::format(ts...));} 97 template <typename...Ts> ErrorAtT(const Loc & loc,Ts...ts)98 void ErrorAtT(const Loc &loc, Ts...ts) { 99 ErrorAtS(loc, iga::format(ts...)); 100 } 101 void ErrorAtS(const Loc &loc, const std::string &smsg); 102 103 template <typename...Ts> FailT(Ts...ts)104 void FailT(Ts...ts) {FailS(NextLoc(), iga::format(ts...));} 105 template <typename...Ts> FailAtT(const Loc & loc,Ts...ts)106 void FailAtT(const Loc &loc, Ts...ts) { 107 FailS(loc, iga::format(ts...)); 108 } 109 void FailS(const Loc &loc, const std::string &msg); 110 void FailAfterPrev(const char *msg); 111 112 113 ////////////////////////////////////////////////////////////////////// 114 // BASIC and GENERAL FUNCTIONS Next(int i=0) const115 const Token &Next(int i = 0) const {return m_lexer.Next(i);} 116 NextLoc(int i=0) const117 Loc NextLoc(int i = 0) const {return Next(i).loc;} 118 119 uint32_t ExtentToPrevEnd(const Loc &start) const; 120 121 uint32_t ExtentTo(const Loc &start, const Loc &end) const; 122 EndOfFile() const123 bool EndOfFile() const {return m_lexer.EndOfFile();} 124 Skip(int k=1)125 bool Skip(int k = 1) {return m_lexer.Skip(k);} 126 127 std::string GetTokenAsString(const Token &token) const; GetTokenAsString() const128 std::string GetTokenAsString() const { 129 return GetTokenAsString(Next()); 130 } 131 132 ////////////////////////////////////////////////////////////////////// 133 // QUERYING (non-destructive lookahead) LookingAt(Lexeme lxm) const134 bool LookingAt(Lexeme lxm) const {return LookingAtFrom(0,lxm);} 135 bool LookingAtFrom(int k, Lexeme lxm) const; 136 LookingAtSeq(Lexeme lxm0,Lexeme lxm1) const137 bool LookingAtSeq(Lexeme lxm0, Lexeme lxm1) const {return LookingAtSeq({lxm0,lxm1});} LookingAtSeq(Lexeme lxm0,Lexeme lxm1,Lexeme lxm2) const138 bool LookingAtSeq(Lexeme lxm0, Lexeme lxm1, Lexeme lxm2) const {return LookingAtSeq({lxm0,lxm1,lxm2});} 139 bool LookingAtSeq(std::initializer_list<Lexeme> lxms) const; 140 LookingAtAnyOf(Lexeme lxm0,Lexeme lxm1) const141 bool LookingAtAnyOf(Lexeme lxm0, Lexeme lxm1) const {return LookingAtAnyOf({lxm0,lxm1}); } LookingAtAnyOf(Lexeme lxm0,Lexeme lxm1,Lexeme lxm2) const142 bool LookingAtAnyOf(Lexeme lxm0, Lexeme lxm1, Lexeme lxm2) const {return LookingAtAnyOf({lxm0,lxm1,lxm2}); } 143 bool LookingAtAnyOf(std::initializer_list<Lexeme> lxms) const; 144 bool LookingAtAnyOfFrom(int i, std::initializer_list<Lexeme> lxms) const; 145 146 bool LookingAtPrefix(const char *pfx) const; 147 148 ////////////////////////////////////////////////////////////////////// 149 // CONSUMPTION (destructive lookahead) Consume(Lexeme lxm)150 bool Consume(Lexeme lxm) {return m_lexer.Consume(lxm);} 151 void ConsumeOrFail(Lexeme lxm, const char *msg); 152 // same as above, but the error location chosen is the end of the 153 // previous token; i.e. the suffix is screwed up 154 void ConsumeOrFailAfterPrev(Lexeme lxm, const char *msg); Consume(Lexeme lxm0,Lexeme lxm1)155 bool Consume(Lexeme lxm0, Lexeme lxm1) { 156 // first block doesn't require a label 157 if (LookingAtSeq(lxm0, lxm1)) { 158 return Skip(2); 159 } 160 return false; 161 } 162 163 ////////////////////////////////////////////////////////////////////// 164 // IDENTIFIER and RAW STRING MANIPULATION 165 bool PrefixAtEq(size_t off, const char *pfx) const; 166 167 bool LookingAtIdentEq(const char *eq) const; 168 bool LookingAtIdentEq(int k, const char *eq) const; 169 bool LookingAtIdentEq(const Token &tk, const char *eq) const; 170 bool ConsumeIdentEq(const char *eq); 171 std::string ConsumeIdentOrFail(const char *what = nullptr); // can tell what type of ident optionally; e.g. "op name" 172 173 bool TokenEq(const Token &tk, const char *eq) const; 174 175 template <typename T> IdentLookupFrom(int k,const IdentMap<T> & map,T & value) const176 bool IdentLookupFrom(int k, const IdentMap<T> &map, T &value) const { 177 if (!LookingAtFrom(k, IDENT)) { 178 return false; 179 } 180 for (const auto &p : map) { 181 if (TokenEq(Next(k), p.first.c_str())) { 182 value = p.second; 183 return true; 184 } 185 } 186 return false; 187 } 188 189 template <typename T> ConsumeIdentOneOfOrFail(const IdentMap<T> & map,T & value,const char * errExpecting,const char * errInvalid)190 void ConsumeIdentOneOfOrFail( 191 const IdentMap<T> &map, 192 T &value, 193 const char *errExpecting, 194 const char *errInvalid) 195 { 196 if (!LookingAt(IDENT)) { 197 FailT(errExpecting); 198 } 199 if (!IdentLookupFrom(0, map, value)) { 200 FailT(errInvalid); 201 } 202 Skip(); 203 } 204 205 template <typename T> ConsumeIdentOneOf(const IdentMap<T> & map,T & value)206 bool ConsumeIdentOneOf(const IdentMap<T> &map, T &value) { 207 if (LookingAt(IDENT) && IdentLookupFrom(0, map, value)) { 208 Skip(); 209 return true; 210 } 211 return false; 212 } 213 214 215 /////////////////////////////////////////////////////////////////////////// 216 // NUMBERS 217 // 218 template <typename T> ConsumeIntLit(T & value)219 bool ConsumeIntLit(T &value) { 220 if (LookingAtAnyOf({INTLIT02, INTLIT10, INTLIT16})) { 221 ParseIntFrom(NextLoc(), value); 222 Skip(); 223 return true; 224 } 225 return false; 226 } 227 228 template <typename T> ConsumeIntLitOrFail(T & value,const char * err)229 void ConsumeIntLitOrFail(T &value, const char *err) { 230 if (!ConsumeIntLit(value)) { 231 FailT(err); 232 } 233 } 234 235 // Examples: 236 // 3.141 237 // .451 238 // 3.1e7 239 // 3e9 240 // 3e9.5 241 void ParseFltFrom(const Loc loc, double &value); 242 243 template <typename T> ParseIntFrom(const Loc & loc,T & value)244 void ParseIntFrom(const Loc &loc, T &value) { 245 ParseIntFrom(loc.offset, loc.extent, value); 246 } 247 248 template <typename T> ParseIntFrom(size_t off,size_t len,T & value)249 void ParseIntFrom(size_t off, size_t len, T &value) { 250 const std::string &src = m_lexer.GetSource(); 251 value = 0; 252 if (len > 2 && 253 src[off] == '0' && 254 (src[off + 1] == 'b' || src[off + 1] == 'B')) 255 { 256 for (size_t i = 2; i < len; i++) { 257 char chr = src[off + i]; 258 T next_value = 2 * value + chr - '0'; 259 if (next_value < value) { 260 FailS(-1, "integer literal too large"); 261 } 262 value = next_value; 263 } 264 } else if (len > 2 && 265 src[off] == '0' && 266 (src[off + 1] == 'x' || src[off + 1] == 'X')) 267 { 268 for (size_t i = 2; i < len; i++) { 269 char chr = src[off + i]; 270 char dig = 0; 271 if (chr >= '0' && chr <= '9') 272 dig = chr - '0'; 273 else if (chr >= 'A' && chr <= 'F') 274 dig = chr - 'A' + 10; 275 else if (chr >= 'a' && chr <= 'f') 276 dig = chr - 'a' + 10; 277 T next_value = 16 * value + dig; 278 if (next_value < value) { 279 FailS(-1, "integer literal too large"); 280 } 281 value = next_value; 282 } 283 } else { 284 for (size_t i = 0; i < len; i++) { 285 char chr = src[off + i]; 286 T next_value = 10 * value + chr - '0'; 287 if (next_value < value) { 288 FailS(-1, "integer literal too large"); 289 } 290 value = next_value; 291 } 292 } 293 } 294 }; // Parser 295 } // namespace IGA 296 297 #endif // IGA_FRONTEND_PARSER_HPP 298