1 #ifndef __EO_LEXER_H__
2 #define __EO_LEXER_H__
3
4 #include <setjmp.h>
5
6 #include <Eina.h>
7 #include <Eolian.h>
8
9 #include "eolian_database.h"
10
11 /* a token is an int, custom tokens start at this - single-char tokens are
12 * simply represented by their ascii */
13 #define START_CUSTOM 257
14
15 enum Tokens
16 {
17 TOK_EQ = START_CUSTOM, TOK_NQ, TOK_GE, TOK_LE,
18 TOK_AND, TOK_OR, TOK_LSH, TOK_RSH,
19
20 TOK_DOC, TOK_STRING, TOK_CHAR, TOK_NUMBER, TOK_VALUE
21 };
22
23 /* all keywords in eolian, they can still be used as names (they're TOK_VALUE)
24 * they just fill in the "kw" field of the token
25 *
26 * reserved for the future: @nullable
27 */
28 #define KEYWORDS KW(class), KW(const), KW(enum), KW(return), KW(struct), \
29 \
30 KW(abstract), KW(c_prefix), KW(composites), KW(constructor), KW(constructors), \
31 KW(data), KW(destructor), KW(error), KW(event_c_prefix), KW(events), KW(extends), \
32 KW(free), KW(get), KW(implements), KW(import), KW(interface), \
33 KW(keys), KW(legacy), KW(methods), KW(mixin), KW(params), \
34 KW(parse), KW(parts), KW(ptr), KW(set), KW(type), KW(values), KW(requires), \
35 \
36 KWAT(auto), KWAT(beta), KWAT(by_ref), KWAT(c_name), KWAT(const), \
37 KWAT(empty), KWAT(extern), KWAT(free), KWAT(hot), KWAT(in), KWAT(inout), \
38 KWAT(move), KWAT(no_unused), KWAT(nullable), KWAT(optional), KWAT(out), \
39 KWAT(private), KWAT(property), KWAT(protected), KWAT(restart), \
40 KWAT(pure_virtual), KWAT(static), \
41 \
42 KWH(version), \
43 \
44 KW(byte), KW(ubyte), KW(char), KW(short), KW(ushort), KW(int), KW(uint), \
45 KW(long), KW(ulong), KW(llong), KW(ullong), \
46 \
47 KW(int8), KW(uint8), KW(int16), KW(uint16), KW(int32), KW(uint32), \
48 KW(int64), KW(uint64), KW(int128), KW(uint128), \
49 \
50 KW(size), KW(ssize), KW(intptr), KW(uintptr), KW(ptrdiff), \
51 \
52 KW(time), \
53 \
54 KW(float), KW(double), \
55 \
56 KW(bool), \
57 \
58 KW(slice), KW(rw_slice), \
59 \
60 KW(void), \
61 \
62 KW(accessor), KW(array), KW(future), KW(iterator), KW(list), \
63 KW(any_value), KW(any_value_ref), KW(binbuf), KW(event), \
64 KW(mstring), KW(string), KW(stringshare), KW(strbuf), \
65 \
66 KW(hash), \
67 KW(void_ptr), \
68 KW(function), \
69 KW(__undefined_type), \
70 \
71 KW(true), KW(false), KW(null)
72
73 /* "regular" keyword and @ prefixed keyword */
74 #define KW(x) KW_##x
75 #define KWAT(x) KW_at_##x
76 #define KWH(x) KW_hash_##x
77
78 enum Keywords
79 {
80 KW_UNKNOWN = 0,
81 KEYWORDS
82 };
83
84 #undef KW
85 #undef KWAT
86 #undef KWH
87
88 enum Numbers
89 {
90 NUM_INT,
91 NUM_UINT,
92 NUM_LONG,
93 NUM_ULONG,
94 NUM_LLONG,
95 NUM_ULLONG,
96 NUM_FLOAT,
97 NUM_DOUBLE
98 };
99
100 typedef union
101 {
102 char c;
103 const char *s;
104 signed int i;
105 unsigned int u;
106 signed long l;
107 unsigned long ul;
108 signed long long ll;
109 unsigned long long ull;
110 float f;
111 double d;
112 Eolian_Documentation *doc;
113 } Eo_Token_Union;
114
115 /* a token - "token" is the actual token id, "value" is the value of a token
116 * if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw"
117 * is the keyword id if this is a keyword, it's 0 when not a keyword */
118 typedef struct _Eo_Token
119 {
120 int token, kw;
121 Eo_Token_Union value;
122 } Eo_Token;
123
124 typedef struct _Lexer_Ctx
125 {
126 int line, column;
127 const char *linestr;
128 Eo_Token token;
129 } Lexer_Ctx;
130
131 typedef struct _Eo_Lexer_Dtor
132 {
133 Eina_Free_Cb free_cb;
134 void *data;
135 } Eo_Lexer_Dtor;
136
137 /* keeps all lexer state */
138 typedef struct _Eo_Lexer
139 {
140 /* current character being tested */
141 int current;
142 /* column is token aware column number, for example when lexing a keyword
143 * it points to the beginning of it after the lexing is done, icolumn is
144 * token unaware, always pointing to current column */
145 int column, icolumn;
146 /* the current line number, token aware and unaware */
147 int line_number, iline_number;
148 /* t: "normal" - token to lex into, "lookahead" - a lookahead token, used
149 * to look one token past "t", when we need to check for a token after the
150 * current one and use it in a conditional without consuming the current
151 * token - used in pretty few cases - because we have one extra lookahead
152 * token, that makes our grammar LL(2) - two tokens in total */
153 Eo_Token t, lookahead;
154 /* a string buffer used to keep contents of token currently being read,
155 * if needed at all */
156 Eina_Strbuf *buff;
157 /* a handle pointing to a memory mapped file representing the file we're
158 * currently lexing */
159 Eina_File *handle;
160 /* the source file name */
161 const char *source;
162 /* only basename */
163 const char *filename;
164 /* points to the current character in our mmapped file being lexed, just
165 * incremented until the end */
166 const char *stream;
167 /* end pointer - required to check if we've reached past the file, as
168 * mmapped data will give us no EOF */
169 const char *stream_end;
170 /* points to the current line being lexed, used by error messages to
171 * display the current line with a caret at the respective column */
172 const char *stream_line;
173 /* a pointer to the state this lexer belongs to */
174 Eolian_State *state;
175 /* the unit being filled during current parsing */
176 Eolian_Unit *unit;
177 /* this is jumped to when an error happens */
178 jmp_buf err_jmp;
179
180 /* saved context info */
181 Eina_List *saved_ctxs;
182
183 Eolian_Class *klass;
184 /* a dtor list; dtors can be pushed and popped during
185 * parser execution to simulate scoped resource management
186 *
187 * unpopped dtors (e.g. on error) are run when the state is freed
188 */
189 Eina_List *dtors;
190 /* a node hash; eolian objects can be allocated through this and
191 * they are stored here (with 1 reference) until they're released
192 * into the environment (they also get deref'd)
193 *
194 * if the release never happens, everything is just freed when the state is
195 */
196 Eina_Hash *nodes;
197
198 /* whether we allow lexing expression related tokens */
199 Eina_Bool expr_mode;
200
201 /* decimal point, by default '.' */
202 char decpoint;
203 } Eo_Lexer;
204
205 typedef enum _Eo_Lexer_Error
206 {
207 EO_LEXER_ERROR_UNKNOWN = 0,
208 EO_LEXER_ERROR_NORMAL,
209 EO_LEXER_ERROR_OOM
210 } Eo_Lexer_Error;
211
212 void eo_lexer_init (void);
213 void eo_lexer_shutdown (void);
214 Eo_Lexer *eo_lexer_new (Eolian_State *state, const char *source);
215 void eo_lexer_free (Eo_Lexer *ls);
216 /* gets a regular token, singlechar or one of TOK_something */
217 int eo_lexer_get (Eo_Lexer *ls);
218 /* lookahead token - see Eo_Lexer */
219 int eo_lexer_lookahead (Eo_Lexer *ls);
220 /* "throws" an error, with a custom message and custom token */
221 void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token);
222 /* like above, but uses the lexstate->t.token, a.k.a. current token */
223 void eo_lexer_syntax_error (Eo_Lexer *ls, const char *msg);
224 /* turns the token into a string, writes into the given buffer */
225 void eo_lexer_token_to_str (int token, char *buf);
226 /* returns the string representation of a keyword */
227 const char *eo_lexer_keyword_str_get(int kw);
228 /* checks if the given keyword is a builtin type */
229 Eina_Bool eo_lexer_is_type_keyword(int kw);
230 /* gets a keyword id from the keyword string */
231 int eo_lexer_keyword_str_to_id(const char *kw);
232 /* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */
233 const char *eo_lexer_get_c_type (int kw);
234 /* save, restore and clear context (line, column, line string) */
235 void eo_lexer_context_push (Eo_Lexer *ls);
236 void eo_lexer_context_pop (Eo_Lexer *ls);
237 void eo_lexer_context_restore(Eo_Lexer *ls);
238 void eo_lexer_context_clear (Eo_Lexer *ls);
239
240 /* node ("heap") management */
241 Eolian_Object *eo_lexer_node_new(Eo_Lexer *ls, size_t objsize);
242 Eolian_Object *eo_lexer_node_release(Eo_Lexer *ls, Eolian_Object *obj);
243
244 static inline Eolian_Type *
eo_lexer_type_new(Eo_Lexer * ls)245 eo_lexer_type_new(Eo_Lexer *ls)
246 {
247 return (Eolian_Type *)eo_lexer_node_new(ls, sizeof(Eolian_Type));
248 }
249
250 static inline Eolian_Type *
eo_lexer_type_release(Eo_Lexer * ls,Eolian_Type * tp)251 eo_lexer_type_release(Eo_Lexer *ls, Eolian_Type *tp)
252 {
253 return (Eolian_Type *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
254 }
255
256 static inline Eolian_Typedecl *
eo_lexer_typedecl_new(Eo_Lexer * ls)257 eo_lexer_typedecl_new(Eo_Lexer *ls)
258 {
259 return (Eolian_Typedecl *)eo_lexer_node_new(ls, sizeof(Eolian_Typedecl));
260 }
261
262 static inline Eolian_Typedecl *
eo_lexer_typedecl_release(Eo_Lexer * ls,Eolian_Typedecl * tp)263 eo_lexer_typedecl_release(Eo_Lexer *ls, Eolian_Typedecl *tp)
264 {
265 return (Eolian_Typedecl *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
266 }
267
268 static inline Eolian_Constant *
eo_lexer_constant_new(Eo_Lexer * ls)269 eo_lexer_constant_new(Eo_Lexer *ls)
270 {
271 return (Eolian_Constant *)eo_lexer_node_new(ls, sizeof(Eolian_Constant));
272 }
273
274 static inline Eolian_Constant *
eo_lexer_constant_release(Eo_Lexer * ls,Eolian_Constant * var)275 eo_lexer_constant_release(Eo_Lexer *ls, Eolian_Constant *var)
276 {
277 return (Eolian_Constant *)eo_lexer_node_release(ls, (Eolian_Object *)var);
278 }
279
280 static inline Eolian_Expression *
eo_lexer_expr_new(Eo_Lexer * ls)281 eo_lexer_expr_new(Eo_Lexer *ls)
282 {
283 return (Eolian_Expression *)eo_lexer_node_new(ls, sizeof(Eolian_Expression));
284 }
285
286 static inline Eolian_Expression *
eo_lexer_expr_release(Eo_Lexer * ls,Eolian_Expression * expr)287 eo_lexer_expr_release(Eo_Lexer *ls, Eolian_Expression *expr)
288 {
289 return (Eolian_Expression *)eo_lexer_node_release(ls, (Eolian_Object *)expr);
290 }
291
292 static inline Eolian_Expression *
eo_lexer_expr_release_ref(Eo_Lexer * ls,Eolian_Expression * expr)293 eo_lexer_expr_release_ref(Eo_Lexer *ls, Eolian_Expression *expr)
294 {
295 eolian_object_ref(&expr->base);
296 return eo_lexer_expr_release(ls, expr);
297 }
298
299 static inline Eolian_Error *
eo_lexer_error_new(Eo_Lexer * ls)300 eo_lexer_error_new(Eo_Lexer *ls)
301 {
302 return (Eolian_Error *)eo_lexer_node_new(ls, sizeof(Eolian_Error));
303 }
304
305 static inline Eolian_Error *
eo_lexer_error_release(Eo_Lexer * ls,Eolian_Error * err)306 eo_lexer_error_release(Eo_Lexer *ls, Eolian_Error *err)
307 {
308 return (Eolian_Error *)eo_lexer_node_release(ls, (Eolian_Object *)err);
309 }
310
311 /* "stack" management, only to protect against errors (jumps) in parsing */
312 void eo_lexer_dtor_push(Eo_Lexer *ls, Eina_Free_Cb free_cb, void *data);
313 void eo_lexer_dtor_pop(Eo_Lexer *ls);
314
315
316 #endif /* __EO_LEXER_H__ */
317