1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef IGA_FRONTEND_PARSER_HPP
10 #define IGA_FRONTEND_PARSER_HPP
11 
12 #include "BufferedLexer.hpp"
13 #include "../ErrorHandler.hpp"
14 #include "../Models/Models.hpp"
15 #include "../IR/Loc.hpp"
16 
17 
18 #include <cstdarg>
19 #include <initializer_list>
20 #include <ostream>
21 #include <sstream>
22 #include <stdexcept>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 namespace iga
28 {
29     template<typename T> using IdentMap =
30         std::initializer_list<std::pair<std::string,T>>;
31     template <typename T>
Lookup(std::string sym,const IdentMap<T> & M,T orElse)32     static inline T Lookup(std::string sym, const IdentMap<T> &M, T orElse) {
33         for (const auto &e : M) {
34             if (e.first == sym)
35                 return e.second;
36         }
37         return orElse;
38     }
39 
40 
41     // this type is used to bail out of the parsing algorithm upon syntax error
42     struct SyntaxError : std::runtime_error {
43         const Loc loc;
44         std::string message;
45 
SyntaxErroriga::SyntaxError46         SyntaxError(const struct Loc &l, const std::string &m) throw ()
47             : std::runtime_error(m)
48             , loc(l)
49             , message(m)
50         {
51         }
~SyntaxErroriga::SyntaxError52         ~SyntaxError() { }
53     };
54 
55     ///////////////////////////////////////////////////////////////////////////
56     // Recursive descent parser.
57     // The nomaclaure for method names is roughly:
58     //   Looking****      peeks at the token, doesn't consume
59     //   Looking**From    peeks relative to the lexer's current offset
60     //   Consume****      consume next token if some criteria is true
61     //   Parse******      generally corresponds to a non-terminal or some
62     //                    complicated lexemes
63     //
64     //
65     class Parser {
66     protected:
67         BufferedLexer                  m_lexer;
68         ErrorHandler                  &m_errorHandler;
69     public:
Parser(const std::string & inp,ErrorHandler & errHandler)70         Parser(const std::string &inp, ErrorHandler &errHandler)
71             : m_lexer(inp)
72             , m_errorHandler(errHandler)
73         {
74         }
75 
76         //////////////////////////////////////////////////////////////////////
77         // DEBUGGING
78         // void DumpLookaheads(int n = 1) const {m_lexer.DumpLookaheads(n); }
ShowCurrentLexicalContext(std::ostream & os) const79         void ShowCurrentLexicalContext(std::ostream &os) const {
80             ShowCurrentLexicalContext(NextLoc(), os);
81         }
82         void ShowCurrentLexicalContext(const Loc &loc, std::ostream &os) const;
83 
84         //////////////////////////////////////////////////////////////////////
85         // WARNINGS and ERRORS
86         template <typename...Ts>
WarningT(Ts...ts)87         void WarningT(Ts...ts) {WarningS(NextLoc(), iga::format(ts...));}
88         template <typename...Ts>
WarningAtT(const Loc & loc,Ts...ts)89         void WarningAtT(const Loc &loc, Ts...ts) {
90             WarningS(loc, iga::format(ts...));
91         }
92         void WarningS(const Loc &loc, const std::string &msg);
93 
94 
95         template <typename...Ts>
ErrorT(Ts...ts)96         void ErrorT(Ts...ts) {ErrorAtS(NextLoc(), iga::format(ts...));}
97         template <typename...Ts>
ErrorAtT(const Loc & loc,Ts...ts)98         void ErrorAtT(const Loc &loc, Ts...ts) {
99             ErrorAtS(loc, iga::format(ts...));
100         }
101         void ErrorAtS(const Loc &loc, const std::string &smsg);
102 
103         template <typename...Ts>
FailT(Ts...ts)104         void FailT(Ts...ts) {FailS(NextLoc(), iga::format(ts...));}
105         template <typename...Ts>
FailAtT(const Loc & loc,Ts...ts)106         void FailAtT(const Loc &loc, Ts...ts) {
107             FailS(loc, iga::format(ts...));
108         }
109         void FailS(const Loc &loc, const std::string &msg);
110         void FailAfterPrev(const char *msg);
111 
112 
113         //////////////////////////////////////////////////////////////////////
114         // BASIC and GENERAL FUNCTIONS
Next(int i=0) const115         const Token &Next(int i = 0) const {return m_lexer.Next(i);}
116 
NextLoc(int i=0) const117         Loc NextLoc(int i = 0) const {return Next(i).loc;}
118 
119         uint32_t ExtentToPrevEnd(const Loc &start) const;
120 
121         uint32_t ExtentTo(const Loc &start, const Loc &end) const;
122 
EndOfFile() const123         bool EndOfFile() const {return m_lexer.EndOfFile();}
124 
Skip(int k=1)125         bool Skip(int k = 1) {return m_lexer.Skip(k);}
126 
127         std::string GetTokenAsString(const Token &token) const;
GetTokenAsString() const128         std::string GetTokenAsString() const {
129             return GetTokenAsString(Next());
130         }
131 
132         //////////////////////////////////////////////////////////////////////
133         // QUERYING (non-destructive lookahead)
LookingAt(Lexeme lxm) const134         bool LookingAt(Lexeme lxm) const {return LookingAtFrom(0,lxm);}
135         bool LookingAtFrom(int k, Lexeme lxm) const;
136 
LookingAtSeq(Lexeme lxm0,Lexeme lxm1) const137         bool LookingAtSeq(Lexeme lxm0, Lexeme lxm1) const {return LookingAtSeq({lxm0,lxm1});}
LookingAtSeq(Lexeme lxm0,Lexeme lxm1,Lexeme lxm2) const138         bool LookingAtSeq(Lexeme lxm0, Lexeme lxm1, Lexeme lxm2) const {return LookingAtSeq({lxm0,lxm1,lxm2});}
139         bool LookingAtSeq(std::initializer_list<Lexeme> lxms) const;
140 
LookingAtAnyOf(Lexeme lxm0,Lexeme lxm1) const141         bool LookingAtAnyOf(Lexeme lxm0, Lexeme lxm1) const {return LookingAtAnyOf({lxm0,lxm1}); }
LookingAtAnyOf(Lexeme lxm0,Lexeme lxm1,Lexeme lxm2) const142         bool LookingAtAnyOf(Lexeme lxm0, Lexeme lxm1, Lexeme lxm2) const {return LookingAtAnyOf({lxm0,lxm1,lxm2}); }
143         bool LookingAtAnyOf(std::initializer_list<Lexeme> lxms) const;
144         bool LookingAtAnyOfFrom(int i, std::initializer_list<Lexeme> lxms) const;
145 
146         bool LookingAtPrefix(const char *pfx) const;
147 
148         //////////////////////////////////////////////////////////////////////
149         // CONSUMPTION (destructive lookahead)
Consume(Lexeme lxm)150         bool Consume(Lexeme lxm) {return m_lexer.Consume(lxm);}
151         void ConsumeOrFail(Lexeme lxm, const char *msg);
152         // same as above, but the error location chosen is the end of the
153         // previous token; i.e. the suffix is screwed up
154         void ConsumeOrFailAfterPrev(Lexeme lxm, const char *msg);
Consume(Lexeme lxm0,Lexeme lxm1)155         bool Consume(Lexeme lxm0, Lexeme lxm1) {
156             // first block doesn't require a label
157             if (LookingAtSeq(lxm0, lxm1)) {
158                 return Skip(2);
159             }
160             return false;
161         }
162 
163         //////////////////////////////////////////////////////////////////////
164         // IDENTIFIER and RAW STRING MANIPULATION
165         bool PrefixAtEq(size_t off, const char *pfx) const;
166 
167         bool LookingAtIdentEq(const char *eq) const;
168         bool LookingAtIdentEq(int k, const char *eq) const;
169         bool LookingAtIdentEq(const Token &tk, const char *eq) const;
170         bool ConsumeIdentEq(const char *eq);
171         std::string ConsumeIdentOrFail(const char *what = nullptr); // can tell what type of ident optionally; e.g. "op name"
172 
173         bool TokenEq(const Token &tk, const char *eq) const;
174 
175         template <typename T>
IdentLookupFrom(int k,const IdentMap<T> & map,T & value) const176         bool IdentLookupFrom(int k, const IdentMap<T> &map, T &value) const {
177             if (!LookingAtFrom(k, IDENT)) {
178                 return false;
179             }
180             for (const auto &p : map) {
181                 if (TokenEq(Next(k), p.first.c_str())) {
182                     value = p.second;
183                     return true;
184                 }
185             }
186             return false;
187         }
188 
189         template <typename T>
ConsumeIdentOneOfOrFail(const IdentMap<T> & map,T & value,const char * errExpecting,const char * errInvalid)190         void ConsumeIdentOneOfOrFail(
191             const IdentMap<T> &map,
192             T &value,
193             const char *errExpecting,
194             const char *errInvalid)
195         {
196             if (!LookingAt(IDENT)) {
197                 FailT(errExpecting);
198             }
199             if (!IdentLookupFrom(0, map, value)) {
200                 FailT(errInvalid);
201             }
202             Skip();
203         }
204 
205         template <typename T>
ConsumeIdentOneOf(const IdentMap<T> & map,T & value)206         bool ConsumeIdentOneOf(const IdentMap<T> &map, T &value) {
207             if (LookingAt(IDENT) && IdentLookupFrom(0, map, value)) {
208                 Skip();
209                 return true;
210             }
211             return false;
212         }
213 
214 
215         ///////////////////////////////////////////////////////////////////////////
216         // NUMBERS
217         //
218         template <typename T>
ConsumeIntLit(T & value)219         bool ConsumeIntLit(T &value) {
220             if (LookingAtAnyOf({INTLIT02, INTLIT10, INTLIT16})) {
221                 ParseIntFrom(NextLoc(), value);
222                 Skip();
223                 return true;
224             }
225             return false;
226         }
227 
228         template <typename T>
ConsumeIntLitOrFail(T & value,const char * err)229         void ConsumeIntLitOrFail(T &value, const char *err) {
230             if (!ConsumeIntLit(value)) {
231                 FailT(err);
232             }
233         }
234 
235         // Examples:
236         //   3.141
237         //    .451
238         //   3.1e7
239         //   3e9
240         //   3e9.5
241         void ParseFltFrom(const Loc loc, double &value);
242 
243         template <typename T>
ParseIntFrom(const Loc & loc,T & value)244         void ParseIntFrom(const Loc &loc, T &value) {
245             ParseIntFrom(loc.offset, loc.extent, value);
246         }
247 
248         template <typename T>
ParseIntFrom(size_t off,size_t len,T & value)249         void ParseIntFrom(size_t off, size_t len, T &value) {
250             const std::string &src = m_lexer.GetSource();
251             value = 0;
252             if (len > 2 &&
253                 src[off] == '0' &&
254                 (src[off + 1] == 'b' || src[off + 1] == 'B'))
255             {
256                 for (size_t i = 2; i < len; i++) {
257                     char chr = src[off + i];
258                     T next_value = 2 * value + chr - '0';
259                     if (next_value < value) {
260                         FailS(-1, "integer literal too large");
261                     }
262                     value = next_value;
263                 }
264             } else if (len > 2 &&
265                 src[off] == '0' &&
266                 (src[off + 1] == 'x' || src[off + 1] == 'X'))
267             {
268                 for (size_t i = 2; i < len; i++) {
269                     char chr = src[off + i];
270                     char dig = 0;
271                     if (chr >= '0' && chr <= '9')
272                         dig = chr - '0';
273                     else if (chr >= 'A' && chr <= 'F')
274                         dig = chr - 'A' + 10;
275                     else if (chr >= 'a' && chr <= 'f')
276                         dig = chr - 'a' + 10;
277                     T next_value = 16 * value + dig;
278                     if (next_value < value) {
279                         FailS(-1, "integer literal too large");
280                     }
281                     value = next_value;
282                 }
283             } else {
284                 for (size_t i = 0; i < len; i++) {
285                     char chr = src[off + i];
286                     T next_value = 10 * value + chr - '0';
287                     if (next_value < value) {
288                         FailS(-1, "integer literal too large");
289                     }
290                     value = next_value;
291                 }
292             }
293         }
294     }; // Parser
295 } // namespace IGA
296 
297 #endif // IGA_FRONTEND_PARSER_HPP
298