1 #ifndef __EO_LEXER_H__
2 #define __EO_LEXER_H__
3 
4 #include <setjmp.h>
5 
6 #include <Eina.h>
7 #include <Eolian.h>
8 
9 #include "eolian_database.h"
10 
11 /* a token is an int, custom tokens start at this - single-char tokens are
12  * simply represented by their ascii */
13 #define START_CUSTOM 257
14 
15 enum Tokens
16 {
17    TOK_EQ = START_CUSTOM, TOK_NQ, TOK_GE, TOK_LE,
18    TOK_AND, TOK_OR, TOK_LSH, TOK_RSH,
19 
20    TOK_DOC, TOK_STRING, TOK_CHAR, TOK_NUMBER, TOK_VALUE
21 };
22 
23 /* all keywords in eolian, they can still be used as names (they're TOK_VALUE)
24  * they just fill in the "kw" field of the token
25  *
26  * reserved for the future: @nullable
27  */
28 #define KEYWORDS KW(class), KW(const), KW(enum), KW(return), KW(struct), \
29     \
30     KW(abstract), KW(c_prefix), KW(composites), KW(constructor), KW(constructors), \
31     KW(data), KW(destructor), KW(error), KW(event_c_prefix), KW(events), KW(extends), \
32     KW(free), KW(get), KW(implements), KW(import), KW(interface), \
33     KW(keys), KW(legacy), KW(methods), KW(mixin), KW(params), \
34     KW(parse), KW(parts), KW(ptr), KW(set), KW(type), KW(values), KW(requires), \
35     \
36     KWAT(auto), KWAT(beta), KWAT(by_ref), KWAT(c_name), KWAT(const), \
37     KWAT(empty), KWAT(extern), KWAT(free), KWAT(hot), KWAT(in), KWAT(inout), \
38     KWAT(move), KWAT(no_unused), KWAT(nullable), KWAT(optional), KWAT(out), \
39     KWAT(private), KWAT(property), KWAT(protected), KWAT(restart), \
40     KWAT(pure_virtual), KWAT(static), \
41     \
42     KWH(version), \
43     \
44     KW(byte), KW(ubyte), KW(char), KW(short), KW(ushort), KW(int), KW(uint), \
45     KW(long), KW(ulong), KW(llong), KW(ullong), \
46     \
47     KW(int8), KW(uint8), KW(int16), KW(uint16), KW(int32), KW(uint32), \
48     KW(int64), KW(uint64), KW(int128), KW(uint128), \
49     \
50     KW(size), KW(ssize), KW(intptr), KW(uintptr), KW(ptrdiff), \
51     \
52     KW(time), \
53     \
54     KW(float), KW(double), \
55     \
56     KW(bool), \
57     \
58     KW(slice), KW(rw_slice), \
59     \
60     KW(void), \
61     \
62     KW(accessor), KW(array), KW(future), KW(iterator), KW(list), \
63     KW(any_value), KW(any_value_ref), KW(binbuf), KW(event), \
64     KW(mstring), KW(string), KW(stringshare), KW(strbuf), \
65     \
66     KW(hash), \
67     KW(void_ptr), \
68     KW(function), \
69     KW(__undefined_type), \
70     \
71     KW(true), KW(false), KW(null)
72 
73 /* "regular" keyword and @ prefixed keyword */
74 #define KW(x) KW_##x
75 #define KWAT(x) KW_at_##x
76 #define KWH(x) KW_hash_##x
77 
78 enum Keywords
79 {
80    KW_UNKNOWN = 0,
81    KEYWORDS
82 };
83 
84 #undef KW
85 #undef KWAT
86 #undef KWH
87 
88 enum Numbers
89 {
90    NUM_INT,
91    NUM_UINT,
92    NUM_LONG,
93    NUM_ULONG,
94    NUM_LLONG,
95    NUM_ULLONG,
96    NUM_FLOAT,
97    NUM_DOUBLE
98 };
99 
100 typedef union
101 {
102    char               c;
103    const    char     *s;
104    signed   int       i;
105    unsigned int       u;
106    signed   long      l;
107    unsigned long      ul;
108    signed   long long ll;
109    unsigned long long ull;
110    float              f;
111    double             d;
112    Eolian_Documentation *doc;
113 } Eo_Token_Union;
114 
115 /* a token - "token" is the actual token id, "value" is the value of a token
116  * if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw"
117  * is the keyword id if this is a keyword, it's 0 when not a keyword */
118 typedef struct _Eo_Token
119 {
120    int token, kw;
121    Eo_Token_Union value;
122 } Eo_Token;
123 
124 typedef struct _Lexer_Ctx
125 {
126    int line, column;
127    const char *linestr;
128    Eo_Token token;
129 } Lexer_Ctx;
130 
131 typedef struct _Eo_Lexer_Dtor
132 {
133    Eina_Free_Cb free_cb;
134    void *data;
135 } Eo_Lexer_Dtor;
136 
137 /* keeps all lexer state */
138 typedef struct _Eo_Lexer
139 {
140    /* current character being tested */
141    int          current;
142    /* column is token aware column number, for example when lexing a keyword
143     * it points to the beginning of it after the lexing is done, icolumn is
144     * token unaware, always pointing to current column */
145    int          column, icolumn;
146    /* the current line number, token aware and unaware */
147    int          line_number, iline_number;
148    /* t: "normal" - token to lex into, "lookahead" - a lookahead token, used
149     * to look one token past "t", when we need to check for a token after the
150     * current one and use it in a conditional without consuming the current
151     * token - used in pretty few cases - because we have one extra lookahead
152     * token, that makes our grammar LL(2) - two tokens in total */
153    Eo_Token     t, lookahead;
154    /* a string buffer used to keep contents of token currently being read,
155     * if needed at all */
156    Eina_Strbuf *buff;
157    /* a handle pointing to a memory mapped file representing the file we're
158     * currently lexing */
159    Eina_File   *handle;
160    /* the source file name */
161    const char  *source;
162    /* only basename */
163    const char  *filename;
164    /* points to the current character in our mmapped file being lexed, just
165     * incremented until the end */
166    const char  *stream;
167    /* end pointer - required to check if we've reached past the file, as
168     * mmapped data will give us no EOF */
169    const char  *stream_end;
170    /* points to the current line being lexed, used by error messages to
171     * display the current line with a caret at the respective column */
172    const char  *stream_line;
173    /* a pointer to the state this lexer belongs to */
174    Eolian_State *state;
175    /* the unit being filled during current parsing */
176    Eolian_Unit *unit;
177    /* this is jumped to when an error happens */
178    jmp_buf      err_jmp;
179 
180    /* saved context info */
181    Eina_List *saved_ctxs;
182 
183    Eolian_Class *klass;
184    /* a dtor list; dtors can be pushed and popped during
185     * parser execution to simulate scoped resource management
186     *
187     * unpopped dtors (e.g. on error) are run when the state is freed
188     */
189    Eina_List *dtors;
190    /* a node hash; eolian objects can be allocated through this and
191     * they are stored here (with 1 reference) until they're released
192     * into the environment (they also get deref'd)
193     *
194     * if the release never happens, everything is just freed when the state is
195     */
196    Eina_Hash *nodes;
197 
198    /* whether we allow lexing expression related tokens */
199    Eina_Bool expr_mode;
200 
201    /* decimal point, by default '.' */
202    char decpoint;
203 } Eo_Lexer;
204 
205 typedef enum _Eo_Lexer_Error
206 {
207    EO_LEXER_ERROR_UNKNOWN = 0,
208    EO_LEXER_ERROR_NORMAL,
209    EO_LEXER_ERROR_OOM
210 } Eo_Lexer_Error;
211 
212 void        eo_lexer_init           (void);
213 void        eo_lexer_shutdown       (void);
214 Eo_Lexer   *eo_lexer_new            (Eolian_State *state, const char *source);
215 void        eo_lexer_free           (Eo_Lexer *ls);
216 /* gets a regular token, singlechar or one of TOK_something */
217 int         eo_lexer_get            (Eo_Lexer *ls);
218 /* lookahead token - see Eo_Lexer */
219 int         eo_lexer_lookahead      (Eo_Lexer *ls);
220 /* "throws" an error, with a custom message and custom token */
221 void        eo_lexer_lex_error      (Eo_Lexer *ls, const char *msg, int token);
222 /* like above, but uses the lexstate->t.token, a.k.a. current token */
223 void        eo_lexer_syntax_error   (Eo_Lexer *ls, const char *msg);
224 /* turns the token into a string, writes into the given buffer */
225 void        eo_lexer_token_to_str   (int token, char *buf);
226 /* returns the string representation of a keyword */
227 const char *eo_lexer_keyword_str_get(int kw);
228 /* checks if the given keyword is a builtin type */
229 Eina_Bool   eo_lexer_is_type_keyword(int kw);
230 /* gets a keyword id from the keyword string */
231 int         eo_lexer_keyword_str_to_id(const char *kw);
232 /* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */
233 const char *eo_lexer_get_c_type     (int kw);
234 /* save, restore and clear context (line, column, line string) */
235 void eo_lexer_context_push   (Eo_Lexer *ls);
236 void eo_lexer_context_pop    (Eo_Lexer *ls);
237 void eo_lexer_context_restore(Eo_Lexer *ls);
238 void eo_lexer_context_clear  (Eo_Lexer *ls);
239 
240 /* node ("heap") management */
241 Eolian_Object *eo_lexer_node_new(Eo_Lexer *ls, size_t objsize);
242 Eolian_Object *eo_lexer_node_release(Eo_Lexer *ls, Eolian_Object *obj);
243 
244 static inline Eolian_Type *
eo_lexer_type_new(Eo_Lexer * ls)245 eo_lexer_type_new(Eo_Lexer *ls)
246 {
247    return (Eolian_Type *)eo_lexer_node_new(ls, sizeof(Eolian_Type));
248 }
249 
250 static inline Eolian_Type *
eo_lexer_type_release(Eo_Lexer * ls,Eolian_Type * tp)251 eo_lexer_type_release(Eo_Lexer *ls, Eolian_Type *tp)
252 {
253    return (Eolian_Type *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
254 }
255 
256 static inline Eolian_Typedecl *
eo_lexer_typedecl_new(Eo_Lexer * ls)257 eo_lexer_typedecl_new(Eo_Lexer *ls)
258 {
259    return (Eolian_Typedecl *)eo_lexer_node_new(ls, sizeof(Eolian_Typedecl));
260 }
261 
262 static inline Eolian_Typedecl *
eo_lexer_typedecl_release(Eo_Lexer * ls,Eolian_Typedecl * tp)263 eo_lexer_typedecl_release(Eo_Lexer *ls, Eolian_Typedecl *tp)
264 {
265    return (Eolian_Typedecl *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
266 }
267 
268 static inline Eolian_Constant *
eo_lexer_constant_new(Eo_Lexer * ls)269 eo_lexer_constant_new(Eo_Lexer *ls)
270 {
271    return (Eolian_Constant *)eo_lexer_node_new(ls, sizeof(Eolian_Constant));
272 }
273 
274 static inline Eolian_Constant *
eo_lexer_constant_release(Eo_Lexer * ls,Eolian_Constant * var)275 eo_lexer_constant_release(Eo_Lexer *ls, Eolian_Constant *var)
276 {
277    return (Eolian_Constant *)eo_lexer_node_release(ls, (Eolian_Object *)var);
278 }
279 
280 static inline Eolian_Expression *
eo_lexer_expr_new(Eo_Lexer * ls)281 eo_lexer_expr_new(Eo_Lexer *ls)
282 {
283    return (Eolian_Expression *)eo_lexer_node_new(ls, sizeof(Eolian_Expression));
284 }
285 
286 static inline Eolian_Expression *
eo_lexer_expr_release(Eo_Lexer * ls,Eolian_Expression * expr)287 eo_lexer_expr_release(Eo_Lexer *ls, Eolian_Expression *expr)
288 {
289    return (Eolian_Expression *)eo_lexer_node_release(ls, (Eolian_Object *)expr);
290 }
291 
292 static inline Eolian_Expression *
eo_lexer_expr_release_ref(Eo_Lexer * ls,Eolian_Expression * expr)293 eo_lexer_expr_release_ref(Eo_Lexer *ls, Eolian_Expression *expr)
294 {
295    eolian_object_ref(&expr->base);
296    return eo_lexer_expr_release(ls, expr);
297 }
298 
299 static inline Eolian_Error *
eo_lexer_error_new(Eo_Lexer * ls)300 eo_lexer_error_new(Eo_Lexer *ls)
301 {
302    return (Eolian_Error *)eo_lexer_node_new(ls, sizeof(Eolian_Error));
303 }
304 
305 static inline Eolian_Error *
eo_lexer_error_release(Eo_Lexer * ls,Eolian_Error * err)306 eo_lexer_error_release(Eo_Lexer *ls, Eolian_Error *err)
307 {
308    return (Eolian_Error *)eo_lexer_node_release(ls, (Eolian_Object *)err);
309 }
310 
311 /* "stack" management, only to protect against errors (jumps) in parsing */
312 void eo_lexer_dtor_push(Eo_Lexer *ls, Eina_Free_Cb free_cb, void *data);
313 void eo_lexer_dtor_pop(Eo_Lexer *ls);
314 
315 
316 #endif /* __EO_LEXER_H__ */
317