1 #ifdef HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4
5 #include <stdio.h>
6 #include <ctype.h>
7 #include <locale.h>
8
9 #include <setjmp.h>
10 #include <assert.h>
11
12 #include "eo_lexer.h"
13 #include "eolian_priv.h"
14
15 static int lastbytes = 0;
16
17 static void
next_char(Eo_Lexer * ls)18 next_char(Eo_Lexer *ls)
19 {
20 int nb;
21 Eina_Bool end = EINA_FALSE;
22
23 if (ls->stream == ls->stream_end)
24 {
25 end = EINA_TRUE;
26 ls->current = '\0';
27 }
28 else
29 ls->current = *(ls->stream++);
30
31 nb = lastbytes;
32 if (!nb && end) nb = 1;
33 if (!nb) eina_unicode_utf8_next_get(ls->stream - 1, &nb);
34
35 if (nb == 1)
36 {
37 nb = 0;
38 ++ls->icolumn;
39 ls->column = ls->icolumn;
40 }
41 else --nb;
42
43 lastbytes = nb;
44 }
45
46 #define KW(x) #x
47 #define KWAT(x) "@" #x
48 #define KWH(x) "#" #x
49
50 static const char * const tokens[] =
51 {
52 "==", "!=", ">=", "<=", "&&", "||", "<<", ">>",
53 "<doc>", "<string>", "<char>", "<number>", "<value>"
54 };
55
56 static const char * const keywords[] = { KEYWORDS };
57
58 static const char * const ctypes[] =
59 {
60 "signed char", "unsigned char", "char", "short", "unsigned short", "int",
61 "unsigned int", "long", "unsigned long", "long long", "unsigned long long",
62
63 "int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t",
64 "int64_t", "uint64_t", "int128_t", "uint128_t",
65
66 "size_t", "ssize_t", "intptr_t", "uintptr_t", "ptrdiff_t",
67
68 "time_t",
69
70 "float", "double",
71
72 "Eina_Bool",
73
74 "Eina_Slice", "Eina_Rw_Slice",
75
76 "void",
77
78 "Eina_Accessor *", "Eina_Array *", "Eina_Future *", "Eina_Iterator *",
79 "Eina_List *",
80 "Eina_Value", "Eina_Value *", "Eina_Binbuf *", "Efl_Event *",
81 "char *", "const char *", "Eina_Stringshare *", "Eina_Strbuf *",
82
83 "Eina_Hash *",
84 "void *",
85
86 "function",
87 };
88
89 #undef KW
90 #undef KWAT
91 #undef KWH
92
93 #define is_newline(c) ((c) == '\n' || (c) == '\r')
94
95 static Eina_Hash *keyword_map = NULL;
96
97 static void
throw(Eo_Lexer * ls,const char * fmt,...)98 throw(Eo_Lexer *ls, const char *fmt, ...)
99 {
100 const char *ln = ls->stream_line, *end = ls->stream_end;
101 Eina_Strbuf *buf = eina_strbuf_new();
102 int i;
103 va_list ap;
104 va_start(ap, fmt);
105 eina_strbuf_append_vprintf(buf, fmt, ap);
106 va_end(ap);
107 eina_strbuf_append(buf, "\n ");
108 while (ln != end && !is_newline(*ln))
109 eina_strbuf_append_char(buf,*(ln++));
110 eina_strbuf_append_char(buf, '\n');
111 for (i = 0; i < ls->column; ++i)
112 eina_strbuf_append_char(buf, ' ');
113 eina_strbuf_append(buf, "^\n");
114 Eolian_Object tmp;
115 memset(&tmp, 0, sizeof(Eolian_Object));
116 tmp.unit = ls->unit;
117 tmp.file = ls->source;
118 tmp.line = ls->line_number;
119 tmp.column = ls->column;
120 eolian_state_log_obj(ls->state, &tmp, "%s", eina_strbuf_string_get(buf));
121 eina_strbuf_free(buf);
122 longjmp(ls->err_jmp, EO_LEXER_ERROR_NORMAL);
123 }
124
125 void
eo_lexer_init(void)126 eo_lexer_init(void)
127 {
128 unsigned int i;
129 if (keyword_map) return;
130 keyword_map = eina_hash_string_superfast_new(NULL);
131 for (i = 0; i < (sizeof(keywords) / sizeof(keywords[0])); ++i)
132 eina_hash_add(keyword_map, keywords[i], (void *)(size_t)(i + 1));
133 }
134
135 void
eo_lexer_shutdown(void)136 eo_lexer_shutdown(void)
137 {
138 if (keyword_map)
139 {
140 eina_hash_free(keyword_map);
141 keyword_map = NULL;
142 }
143 }
144
145 static void
txt_token(Eo_Lexer * ls,int token,char * buf)146 txt_token(Eo_Lexer *ls, int token, char *buf)
147 {
148 if (token == TOK_VALUE)
149 memcpy(buf, ls->t.value.s, strlen(ls->t.value.s) + 1);
150 else
151 return eo_lexer_token_to_str(token, buf);
152 }
153
154 void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token);
155 void eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg);
156
next_line(Eo_Lexer * ls)157 static void next_line(Eo_Lexer *ls)
158 {
159 int old = ls->current;
160 assert(is_newline(ls->current));
161 ls->stream_line = ls->stream;
162 next_char(ls);
163 if (is_newline(ls->current) && ls->current != old)
164 {
165 next_char(ls);
166 ls->stream_line = ls->stream;
167 }
168 if (++ls->iline_number >= INT_MAX)
169 eo_lexer_syntax_error(ls, "chunk has too many lines");
170 ls->line_number = ls->iline_number;
171 ls->icolumn = ls->column = 0;
172 }
173
skip_ws(Eo_Lexer * ls)174 static void skip_ws(Eo_Lexer *ls)
175 {
176 while (isspace(ls->current) && !is_newline(ls->current))
177 next_char(ls);
178 }
179
180 /* go to next line and strip leading whitespace */
next_line_ws(Eo_Lexer * ls)181 static void next_line_ws(Eo_Lexer *ls)
182 {
183 next_line(ls);
184 skip_ws(ls);
185 }
186
187 static Eina_Bool
should_skip_star(Eo_Lexer * ls,int ccol,Eina_Bool * term)188 should_skip_star(Eo_Lexer *ls, int ccol, Eina_Bool *term)
189 {
190 Eina_Bool had_star = EINA_FALSE;
191 if (ls->column == ccol && ls->current == '*')
192 {
193 had_star = EINA_TRUE;
194 next_char(ls);
195 if (ls->current == '/')
196 {
197 next_char(ls);
198 *term = EINA_TRUE;
199 return EINA_FALSE;
200 }
201 skip_ws(ls);
202 }
203 return had_star;
204 }
205
206 static void
read_long_comment(Eo_Lexer * ls,int ccol)207 read_long_comment(Eo_Lexer *ls, int ccol)
208 {
209 Eina_Bool had_star = EINA_FALSE, had_nl = EINA_FALSE;
210 eina_strbuf_reset(ls->buff);
211
212 if (is_newline(ls->current))
213 {
214 Eina_Bool term = EINA_FALSE;
215 had_nl = EINA_TRUE;
216 next_line_ws(ls);
217 had_star = should_skip_star(ls, ccol, &term);
218 if (term) goto cend;
219 }
220
221 for (;;)
222 {
223 if (!ls->current)
224 eo_lexer_lex_error(ls, "unfinished long comment", -1);
225 if (ls->current == '*')
226 {
227 next_char(ls);
228 if (ls->current == '/')
229 {
230 next_char(ls);
231 break;
232 }
233 eina_strbuf_append_char(ls->buff, '*');
234 }
235 else if (is_newline(ls->current))
236 {
237 eina_strbuf_append_char(ls->buff, '\n');
238 next_line_ws(ls);
239 if (!had_nl)
240 {
241 Eina_Bool term = EINA_FALSE;
242 had_nl = EINA_TRUE;
243 had_star = should_skip_star(ls, ccol, &term);
244 if (term) break;
245 }
246 else if (had_star && ls->column == ccol && ls->current == '*')
247 {
248 next_char(ls);
249 if (ls->current == '/')
250 {
251 next_char(ls);
252 break;
253 }
254 skip_ws(ls);
255 }
256 }
257 else
258 {
259 eina_strbuf_append_char(ls->buff, ls->current);
260 next_char(ls);
261 }
262 }
263 cend:
264 eina_strbuf_trim(ls->buff);
265 }
266
267 enum Doc_Tokens {
268 DOC_MANGLED = -2, DOC_UNFINISHED = -1, DOC_TEXT = 0, DOC_SINCE = 1
269 };
270
271 static void
doc_ref_class(Eo_Lexer * ls,const char * cname)272 doc_ref_class(Eo_Lexer *ls, const char *cname)
273 {
274 size_t clen = strlen(cname);
275 char *buf = alloca(clen + 4);
276 memcpy(buf, cname, clen);
277 buf[clen] = '\0';
278 for (char *p = buf; *p; ++p)
279 {
280 if (*p == '.')
281 *p = '_';
282 else
283 *p = tolower(*p);
284 }
285 memcpy(buf + clen, ".eo", sizeof(".eo"));
286 if (!eina_hash_find(ls->state->filenames_eo, buf))
287 return;
288 /* ref'd classes do not become dependencies */
289 database_defer(ls->state, buf, EINA_FALSE);
290 }
291
292 static void
doc_ref(Eo_Lexer * ls,Eolian_Documentation * doc)293 doc_ref(Eo_Lexer *ls, Eolian_Documentation *doc)
294 {
295 const char *st = ls->stream, *ste = ls->stream_end;
296 size_t rlen = 0;
297 while ((st != ste) && ((*st == '.') || (*st == '_') || isalnum(*st)))
298 {
299 ++st;
300 ++rlen;
301 }
302 if ((rlen > 1) && (*(st - 1) == '.'))
303 --rlen;
304 if (!rlen)
305 return;
306 if (*ls->stream == '.')
307 return;
308
309 char *buf = alloca(rlen + 1);
310 memcpy(buf, ls->stream, rlen);
311 buf[rlen] = '\0';
312
313 /* actual full class name */
314 doc_ref_class(ls, buf);
315
316 /* it's definitely a reference, add debug info
317 * 20 bits for line and 12 bits for column, good enough
318 */
319 doc->ref_dbg = eina_list_append(doc->ref_dbg,
320 (void *)(size_t)((ls->line_number & 0xFFFFF) | (((ls->column + 1) & 0xFFF) << 20)));
321
322 /* method name at the end */
323 char *end = strrchr(buf, '.');
324 if (!end)
325 return;
326 *end = '\0';
327 doc_ref_class(ls, buf);
328
329 /* .get or .set at the end, handle possible property */
330 if (strcmp(end + 1, "get") && strcmp(end + 1, "set"))
331 return;
332 end = strrchr(buf, '.');
333 if (!end)
334 return;
335 *end = '\0';
336 doc_ref_class(ls, buf);
337 }
338
339 static int
doc_lex(Eo_Lexer * ls,Eolian_Documentation * doc,Eina_Bool * term,Eina_Bool * since)340 doc_lex(Eo_Lexer *ls, Eolian_Documentation *doc, Eina_Bool *term, Eina_Bool *since)
341 {
342 int tokret = -1;
343 eina_strbuf_reset(ls->buff);
344 *since = EINA_FALSE;
345 for (;;) switch (ls->current)
346 {
347 /* error case */
348 case '\0':
349 return DOC_UNFINISHED;
350 /* newline case: if two or more newlines are present, new paragraph
351 * if only one newline is present, append space to the text buffer
352 * when starting new paragraph, reset doc continutation
353 */
354 case '\n':
355 case '\r':
356 next_line(ls);
357 skip_ws(ls);
358 if (!is_newline(ls->current))
359 {
360 eina_strbuf_append_char(ls->buff, ' ');
361 continue;
362 }
363 while (is_newline(ls->current))
364 next_line_ws(ls);
365 tokret = DOC_TEXT;
366 goto exit_with_token;
367 /* escape case: for any \X, output \X
368 * except for \\]], then output just ]]
369 */
370 case '\\':
371 next_char(ls);
372 if (ls->current == ']')
373 {
374 next_char(ls);
375 if (ls->current == ']')
376 {
377 next_char(ls);
378 eina_strbuf_append(ls->buff, "]]");
379 }
380 else
381 eina_strbuf_append(ls->buff, "\\]");
382 }
383 else
384 eina_strbuf_append_char(ls->buff, '\\');
385 continue;
386 /* terminating case */
387 case ']':
388 next_char(ls);
389 if (ls->current == ']')
390 {
391 /* terminate doc */
392 tokret = DOC_TEXT;
393 goto terminated;
394 }
395 eina_strbuf_append_char(ls->buff, ']');
396 continue;
397 /* references and @since */
398 case '@':
399 if ((size_t)(ls->stream_end - ls->stream) >= (sizeof("since")) &&
400 !memcmp(ls->stream, "since ", sizeof("since")))
401 {
402 next_char(ls);
403 *since = EINA_TRUE;
404 for (size_t i = 0; i < sizeof("since"); ++i)
405 next_char(ls);
406 skip_ws(ls);
407 tokret = DOC_TEXT;
408 goto exit_with_token;
409 }
410 doc_ref(ls, doc);
411 eina_strbuf_append_char(ls->buff, '@');
412 next_char(ls);
413 /* in-class references */
414 if (ls->klass && ls->current == '.')
415 {
416 next_char(ls);
417 if (isalpha(ls->current) || ls->current == '_')
418 eina_strbuf_append(ls->buff, ls->klass->base.name);
419 eina_strbuf_append_char(ls->buff, '.');
420 }
421 continue;
422 /* default case - append character */
423 default:
424 eina_strbuf_append_char(ls->buff, ls->current);
425 next_char(ls);
426 continue;
427 }
428 terminated:
429 next_char(ls);
430 *term = EINA_TRUE;
431 exit_with_token:
432 eina_strbuf_trim(ls->buff);
433 return tokret;
434 }
435
436 static int
read_since(Eo_Lexer * ls)437 read_since(Eo_Lexer *ls)
438 {
439 eina_strbuf_reset(ls->buff);
440 while (ls->current && (ls->current == '.' ||
441 ls->current == '_' ||
442 isalnum(ls->current)))
443 {
444 eina_strbuf_append_char(ls->buff, ls->current);
445 next_char(ls);
446 }
447 if (!eina_strbuf_length_get(ls->buff))
448 return DOC_UNFINISHED;
449 skip_ws(ls);
450 while (is_newline(ls->current))
451 next_line_ws(ls);
452 if (ls->current != ']')
453 return DOC_MANGLED;
454 next_char(ls);
455 if (ls->current != ']')
456 return DOC_MANGLED;
457 next_char(ls);
458 return DOC_SINCE;
459 }
460
doc_error(Eo_Lexer * ls,const char * msg,Eolian_Documentation * doc,Eina_Strbuf * buf)461 void doc_error(Eo_Lexer *ls, const char *msg, Eolian_Documentation *doc, Eina_Strbuf *buf)
462 {
463 eina_stringshare_del(doc->summary);
464 eina_stringshare_del(doc->description);
465 eina_list_free(doc->ref_dbg);
466 free(doc);
467 eina_strbuf_free(buf);
468 eo_lexer_lex_error(ls, msg, -1);
469 }
470
471 static void
read_doc(Eo_Lexer * ls,Eo_Token * tok,int line,int column)472 read_doc(Eo_Lexer *ls, Eo_Token *tok, int line, int column)
473 {
474 Eolian_Documentation *doc = calloc(1, sizeof(Eolian_Documentation));
475 if (!doc)
476 longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
477
478 doc->base.unit = ls->unit;
479 doc->base.file = ls->filename;
480 doc->base.line = line;
481 doc->base.column = column;
482 doc->base.type = EOLIAN_OBJECT_DOCUMENTATION;
483
484 Eina_Strbuf *rbuf = eina_strbuf_new();
485
486 Eina_Bool term = EINA_FALSE, since = EINA_FALSE;
487 while (!term)
488 {
489 int read;
490 if (since)
491 {
492 read = read_since(ls);
493 term = EINA_TRUE;
494 }
495 else
496 read = doc_lex(ls, doc, &term, &since);
497 switch (read)
498 {
499 case DOC_MANGLED:
500 doc_error(ls, "mangled documentation", doc, rbuf);
501 return;
502 case DOC_UNFINISHED:
503 doc_error(ls, "unfinished documentation", doc, rbuf);
504 return;
505 case DOC_TEXT:
506 if (!eina_strbuf_length_get(ls->buff))
507 continue;
508 if (!doc->summary)
509 doc->summary = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
510 else
511 {
512 if (eina_strbuf_length_get(rbuf))
513 eina_strbuf_append(rbuf, "\n\n");
514 eina_strbuf_append(rbuf, eina_strbuf_string_get(ls->buff));
515 }
516 break;
517 case DOC_SINCE:
518 doc->since = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
519 break;
520 }
521 }
522
523 if (eina_strbuf_length_get(rbuf))
524 doc->description = eina_stringshare_add(eina_strbuf_string_get(rbuf));
525 if (!doc->since && ls->klass && ls->klass->doc)
526 doc->since = eina_stringshare_ref(ls->klass->doc->since);
527 eina_strbuf_free(rbuf);
528 tok->value.doc = doc;
529 }
530
531 static void
esc_error(Eo_Lexer * ls,int * c,int n,const char * msg)532 esc_error(Eo_Lexer *ls, int *c, int n, const char *msg)
533 {
534 int i;
535 eina_strbuf_reset(ls->buff);
536 eina_strbuf_append_char(ls->buff, '\\');
537 for (i = 0; i < n && c[i]; ++i)
538 eina_strbuf_append_char(ls->buff, c[i]);
539 eo_lexer_lex_error(ls, msg, TOK_STRING);
540 }
541
542 static int
hex_val(int c)543 hex_val(int c)
544 {
545 if (c >= 'a') return c - 'a' + 10;
546 if (c >= 'A') return c - 'A' + 10;
547 return c - '0';
548 }
549
550 static int
read_hex_esc(Eo_Lexer * ls)551 read_hex_esc(Eo_Lexer *ls)
552 {
553 int c[3] = { 'x' };
554 int i, r = 0;
555 for (i = 1; i < 3; ++i)
556 {
557 next_char(ls);
558 c[i] = ls->current;
559 if (!isxdigit(c[i]))
560 esc_error(ls, c, i + 1, "hexadecimal digit expected");
561 r = (r << 4) + hex_val(c[i]);
562 }
563 return r;
564 }
565
566 static int
read_dec_esc(Eo_Lexer * ls)567 read_dec_esc(Eo_Lexer *ls)
568 {
569 int c[3];
570 int i, r = 0;
571 for (i = 0; i < 3 && isdigit(ls->current); ++i)
572 {
573 c[i] = ls->current;
574 r = r * 10 + (c[i] - '0');
575 next_char(ls);
576 }
577 if (r > UCHAR_MAX)
578 esc_error(ls, c, i, "decimal escape too large");
579 return r;
580 }
581
582 static void
read_escape(Eo_Lexer * ls)583 read_escape(Eo_Lexer *ls)
584 {
585 switch (ls->current)
586 {
587 case 'a': eina_strbuf_append_char(ls->buff, '\a'); next_char(ls); break;
588 case 'b': eina_strbuf_append_char(ls->buff, '\b'); next_char(ls); break;
589 case 'f': eina_strbuf_append_char(ls->buff, '\f'); next_char(ls); break;
590 case 'n': eina_strbuf_append_char(ls->buff, '\n'); next_char(ls); break;
591 case 'r': eina_strbuf_append_char(ls->buff, '\r'); next_char(ls); break;
592 case 't': eina_strbuf_append_char(ls->buff, '\t'); next_char(ls); break;
593 case 'v': eina_strbuf_append_char(ls->buff, '\v'); next_char(ls); break;
594 case 'x':
595 eina_strbuf_append_char(ls->buff, read_hex_esc(ls));
596 next_char(ls);
597 break;
598 case '\n': case '\r':
599 next_line(ls);
600 eina_strbuf_append_char(ls->buff, '\n');
601 break;
602 case '\\': case '"': case '\'':
603 eina_strbuf_append_char(ls->buff, ls->current);
604 break;
605 case '\0':
606 break;
607 default:
608 if (!isdigit(ls->current))
609 esc_error(ls, &ls->current, 1, "invalid escape sequence");
610 eina_strbuf_append_char(ls->buff, read_dec_esc(ls));
611 break;
612 }
613 }
614
615 static void
read_string(Eo_Lexer * ls,Eo_Token * tok)616 read_string(Eo_Lexer *ls, Eo_Token *tok)
617 {
618 eina_strbuf_reset(ls->buff);
619 eina_strbuf_append_char(ls->buff, '"');
620 next_char(ls);
621 while (ls->current != '"') switch (ls->current)
622 {
623 case '\0':
624 eo_lexer_lex_error(ls, "unfinished string", -1);
625 break;
626 case '\n': case '\r':
627 eo_lexer_lex_error(ls, "unfinished string", TOK_STRING);
628 break;
629 case '\\':
630 {
631 next_char(ls);
632 read_escape(ls);
633 break;
634 }
635 default:
636 eina_strbuf_append_char(ls->buff, ls->current);
637 next_char(ls);
638 }
639 eina_strbuf_append_char(ls->buff, ls->current);
640 next_char(ls);
641 tok->value.s = eina_stringshare_add_length(eina_strbuf_string_get(ls->buff) + 1,
642 (unsigned int)eina_strbuf_length_get(ls->buff) - 2);
643 }
644
645 static int
get_type(Eo_Lexer * ls,Eina_Bool is_float)646 get_type(Eo_Lexer *ls, Eina_Bool is_float)
647 {
648 if (is_float)
649 {
650 if (ls->current == 'f' || ls->current == 'F')
651 {
652 next_char(ls);
653 return NUM_FLOAT;
654 }
655 return NUM_DOUBLE;
656 }
657 if (ls->current == 'u' || ls->current == 'U')
658 {
659 next_char(ls);
660 if (ls->current == 'l' || ls->current == 'L')
661 {
662 next_char(ls);
663 if (ls->current == 'l' || ls->current == 'L')
664 {
665 next_char(ls);
666 return NUM_ULLONG;
667 }
668 return NUM_ULONG;
669 }
670 return NUM_UINT;
671 }
672 if (ls->current == 'l' || ls->current == 'L')
673 {
674 next_char(ls);
675 if (ls->current == 'l' || ls->current == 'L')
676 {
677 next_char(ls);
678 return NUM_LLONG;
679 }
680 return NUM_LONG;
681 }
682 return NUM_INT;
683 }
684
685 static void
replace_decpoint(Eo_Lexer * ls,char prevdecp)686 replace_decpoint(Eo_Lexer *ls, char prevdecp)
687 {
688 if (ls->decpoint == prevdecp) return;
689 char *bufs = eina_strbuf_string_steal(ls->buff);
690 char *p = bufs;
691 while ((p = strchr(p, prevdecp))) *p = ls->decpoint;
692 eina_strbuf_append(ls->buff, bufs);
693 free(bufs);
694 }
695
696 static void
write_val_with_decpoint(Eo_Lexer * ls,Eo_Token * tok,int type)697 write_val_with_decpoint(Eo_Lexer *ls, Eo_Token *tok, int type)
698 {
699 struct lconv *lc = localeconv();
700 char prev = ls->decpoint;
701 ls->decpoint = lc ? lc->decimal_point[0] : '.';
702 if (ls->decpoint == prev)
703 {
704 eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
705 return;
706 }
707 replace_decpoint(ls, prev);
708 char *end = NULL;
709 if (type == NUM_FLOAT)
710 tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
711 else if (type == NUM_DOUBLE)
712 tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
713 if (end && end[0])
714 eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
715 tok->kw = type;
716 }
717
718 static void
write_val(Eo_Lexer * ls,Eo_Token * tok,Eina_Bool is_float)719 write_val(Eo_Lexer *ls, Eo_Token *tok, Eina_Bool is_float)
720 {
721 int type = get_type(ls, is_float);
722 char *end = NULL;
723 if (is_float)
724 {
725 replace_decpoint(ls, '.');
726 if (type == NUM_FLOAT)
727 tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
728 else if (type == NUM_DOUBLE)
729 tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
730 }
731 else
732 {
733 const char *str = eina_strbuf_string_get(ls->buff);
734 /* signed is always in the same memory location */
735 if (type == NUM_INT || type == NUM_UINT)
736 tok->value.u = strtoul(str, &end, 0);
737 else if (type == NUM_LONG || type == NUM_ULONG)
738 tok->value.ul = strtoul(str, &end, 0);
739 else if (type == NUM_LLONG || type == NUM_ULLONG)
740 tok->value.ull = strtoull(str, &end, 0);
741 }
742 if (end && end[0])
743 {
744 if (is_float)
745 {
746 write_val_with_decpoint(ls, tok, type);
747 return;
748 }
749 eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
750 }
751 tok->kw = type;
752 }
753
754 static void
write_exp(Eo_Lexer * ls)755 write_exp(Eo_Lexer *ls)
756 {
757 eina_strbuf_append_char(ls->buff, ls->current);
758 next_char(ls);
759 if (ls->current == '+' || ls->current == '-')
760 {
761 eina_strbuf_append_char(ls->buff, ls->current);
762 next_char(ls);
763 while (isdigit(ls->current))
764 {
765 eina_strbuf_append_char(ls->buff, ls->current);
766 next_char(ls);
767 }
768 }
769 }
770
771 static void
read_hex_number(Eo_Lexer * ls,Eo_Token * tok)772 read_hex_number(Eo_Lexer *ls, Eo_Token *tok)
773 {
774 Eina_Bool is_float = EINA_FALSE;
775 while (isxdigit(ls->current) || ls->current == '.')
776 {
777 eina_strbuf_append_char(ls->buff, ls->current);
778 if (ls->current == '.') is_float = EINA_TRUE;
779 next_char(ls);
780 }
781 if (is_float && (ls->current != 'p' && ls->current != 'P'))
782 {
783 eo_lexer_lex_error(ls, "hex float literals require an exponent",
784 TOK_NUMBER);
785 }
786 if (ls->current == 'p' || ls->current == 'P')
787 {
788 is_float = EINA_TRUE;
789 write_exp(ls);
790 }
791 write_val(ls, tok, is_float);
792 }
793
794 static void
read_number(Eo_Lexer * ls,Eo_Token * tok)795 read_number(Eo_Lexer *ls, Eo_Token *tok)
796 {
797 Eina_Bool is_float = eina_strbuf_string_get(ls->buff)[0] == '.';
798 if (ls->current == '0' && !is_float)
799 {
800 eina_strbuf_append_char(ls->buff, ls->current);
801 next_char(ls);
802 if (ls->current == 'x' || ls->current == 'X')
803 {
804 eina_strbuf_append_char(ls->buff, ls->current);
805 next_char(ls);
806 read_hex_number(ls, tok);
807 return;
808 }
809 }
810 while (isdigit(ls->current) || ls->current == '.')
811 {
812 eina_strbuf_append_char(ls->buff, ls->current);
813 if (ls->current == '.') is_float = EINA_TRUE;
814 next_char(ls);
815 }
816 if (ls->current == 'e' || ls->current == 'E')
817 {
818 is_float = EINA_TRUE;
819 write_exp(ls);
820 }
821 write_val(ls, tok, is_float);
822 }
823
824 static int
lex(Eo_Lexer * ls,Eo_Token * tok)825 lex(Eo_Lexer *ls, Eo_Token *tok)
826 {
827 eina_strbuf_reset(ls->buff);
828 tok->value.s = NULL;
829 for (;;) switch (ls->current)
830 {
831 case '\n':
832 case '\r':
833 next_line(ls);
834 continue;
835 case '/':
836 {
837 next_char(ls);
838 if (ls->current == '*')
839 {
840 int ccol = ls->column;
841 next_char(ls);
842 if (ls->current == '@')
843 {
844 eo_lexer_lex_error(ls, "old style documentation comment", -1);
845 return -1; /* unreachable */
846 }
847 read_long_comment(ls, ccol);
848 continue;
849 }
850 else if (ls->current != '/') return '/';
851 next_char(ls);
852 while (ls->current && !is_newline(ls->current))
853 next_char(ls);
854 continue;
855 }
856 case '[':
857 {
858 int dline = ls->line_number, dcol = ls->column;
859 const char *sline = ls->stream_line;
860 next_char(ls);
861 if (ls->current != '[') return '[';
862 next_char(ls);
863 read_doc(ls, tok, dline, dcol);
864 ls->column = dcol + 1;
865 /* doc is the only potentially multiline token */
866 ls->line_number = dline;
867 ls->stream_line = sline;
868 return TOK_DOC;
869 }
870 case '\0':
871 return -1;
872 case '=':
873 next_char(ls);
874 if (!ls->expr_mode || (ls->current != '=')) return '=';
875 next_char(ls);
876 --ls->column;
877 return TOK_EQ;
878 case '!':
879 next_char(ls);
880 if (!ls->expr_mode || (ls->current != '=')) return '!';
881 next_char(ls);
882 --ls->column;
883 return TOK_NQ;
884 case '>':
885 next_char(ls);
886 if (!ls->expr_mode) return '>';
887 if (ls->current == '=')
888 {
889 next_char(ls);
890 --ls->column;
891 return TOK_GE;
892 }
893 else if (ls->current == '>')
894 {
895 next_char(ls);
896 --ls->column;
897 return TOK_RSH;
898 }
899 return '>';
900 case '<':
901 next_char(ls);
902 if (!ls->expr_mode) return '<';
903 if (ls->current == '=')
904 {
905 next_char(ls);
906 --ls->column;
907 return TOK_LE;
908 }
909 else if (ls->current == '<')
910 {
911 next_char(ls);
912 --ls->column;
913 return TOK_LSH;
914 }
915 return '<';
916 case '&':
917 next_char(ls);
918 if (!ls->expr_mode || (ls->current != '&')) return '&';
919 next_char(ls);
920 --ls->column;
921 return TOK_AND;
922 case '|':
923 next_char(ls);
924 if (!ls->expr_mode || (ls->current != '|')) return '|';
925 next_char(ls);
926 --ls->column;
927 return TOK_OR;
928 case '"':
929 {
930 int dcol = ls->column;
931 if (!ls->expr_mode)
932 {
933 next_char(ls);
934 return '"';
935 }
936 /* strings are not multiline for now at least */
937 read_string(ls, tok);
938 ls->column = dcol + 1;
939 return TOK_STRING;
940 }
941 case '\'':
942 {
943 int dcol = ls->column;
944 next_char(ls);
945 if (!ls->expr_mode) return '\'';
946 if (ls->current == '\\')
947 {
948 next_char(ls);
949 eina_strbuf_reset(ls->buff);
950 read_escape(ls);
951 tok->value.c = (char)*eina_strbuf_string_get(ls->buff);
952 }
953 else
954 {
955 tok->value.c = ls->current;
956 next_char(ls);
957 }
958 if (ls->current != '\'')
959 eo_lexer_lex_error(ls, "unfinished character", TOK_CHAR);
960 next_char(ls);
961 ls->column = dcol + 1;
962 return TOK_CHAR;
963 }
964 case '.':
965 {
966 int dcol = ls->column;
967 next_char(ls);
968 if (!isdigit(ls->current)) return '.';
969 eina_strbuf_reset(ls->buff);
970 eina_strbuf_append_char(ls->buff, '.');
971 read_number(ls, tok);
972 ls->column = dcol + 1;
973 return TOK_NUMBER;
974 }
975 default:
976 {
977 if (isspace(ls->current))
978 {
979 assert(!is_newline(ls->current));
980 next_char(ls);
981 continue;
982 }
983 else if (isdigit(ls->current))
984 {
985 int col = ls->column;
986 eina_strbuf_reset(ls->buff);
987 read_number(ls, tok);
988 ls->column = col + 1;
989 return TOK_NUMBER;
990 }
991 if (ls->current && (isalnum(ls->current)
992 || ls->current == '@' || ls->current == '#' || ls->current == '_'))
993 {
994 int col = ls->column;
995 Eina_Bool pfx_kw = (ls->current == '@') || (ls->current == '#');
996 const char *str;
997 eina_strbuf_reset(ls->buff);
998 do
999 {
1000 eina_strbuf_append_char(ls->buff, ls->current);
1001 next_char(ls);
1002 }
1003 while (ls->current && (isalnum(ls->current)
1004 || ls->current == '_'));
1005 str = eina_strbuf_string_get(ls->buff);
1006 tok->kw = (int)(uintptr_t)eina_hash_find(keyword_map,
1007 str);
1008 ls->column = col + 1;
1009 tok->value.s = eina_stringshare_add(str);
1010 if (pfx_kw && tok->kw == 0)
1011 eo_lexer_syntax_error(ls, "invalid keyword");
1012 return TOK_VALUE;
1013 }
1014 else
1015 {
1016 int c = ls->current;
1017 next_char(ls);
1018 return c;
1019 }
1020 }
1021 }
1022 }
1023
1024 static const char *
get_filename(Eo_Lexer * ls)1025 get_filename(Eo_Lexer *ls)
1026 {
1027 const char *fslash = strrchr(ls->source, '/');
1028 const char *bslash = strrchr(ls->source, '\\');
1029 if (fslash || bslash)
1030 return eina_stringshare_add((fslash > bslash) ? (fslash + 1) : (bslash + 1));
1031 return eina_stringshare_ref(ls->source);
1032 }
1033
1034 static void
_node_free(Eolian_Object * obj)1035 _node_free(Eolian_Object *obj)
1036 {
1037 #if 0
1038 /* for when we have a proper node allocator and collect on shutdown */
1039 if (obj->refcount > 1)
1040 {
1041 eolian_state_log(obj->state, "node %p (type %d, name %s at %s:%d:%d)"
1042 " dangling ref (count: %d)", obj, obj->type, obj->name,
1043 obj->file, obj->line, obj->column, obj->refcount);
1044 }
1045 #endif
1046 switch (obj->type)
1047 {
1048 case EOLIAN_OBJECT_CLASS:
1049 database_class_del((Eolian_Class *)obj);
1050 break;
1051 case EOLIAN_OBJECT_TYPEDECL:
1052 database_typedecl_del((Eolian_Typedecl *)obj);
1053 break;
1054 case EOLIAN_OBJECT_TYPE:
1055 database_type_del((Eolian_Type *)obj);
1056 break;
1057 case EOLIAN_OBJECT_CONSTANT:
1058 database_constant_del((Eolian_Constant *)obj);
1059 break;
1060 case EOLIAN_OBJECT_EXPRESSION:
1061 database_expr_del((Eolian_Expression *)obj);
1062 break;
1063 default:
1064 /* normally unreachable, just for debug */
1065 assert(0);
1066 break;
1067 }
1068 }
1069
1070 static void
eo_lexer_set_input(Eo_Lexer * ls,Eolian_State * state,const char * source)1071 eo_lexer_set_input(Eo_Lexer *ls, Eolian_State *state, const char *source)
1072 {
1073 Eina_File *f = eina_file_open(source, EINA_FALSE);
1074 if (!f)
1075 {
1076 eolian_state_log(state, "%s", strerror(errno));
1077 longjmp(ls->err_jmp, EO_LEXER_ERROR_NORMAL);
1078 }
1079 ls->lookahead.token = -1;
1080 ls->state = state;
1081 ls->buff = eina_strbuf_new();
1082 ls->handle = f;
1083 ls->stream = eina_file_map_all(f, EINA_FILE_RANDOM);
1084 ls->stream_end = ls->stream + eina_file_size_get(f);
1085 ls->stream_line = ls->stream;
1086 ls->source = eina_stringshare_add(source);
1087 ls->filename = get_filename(ls);
1088 ls->iline_number = ls->line_number = 1;
1089 ls->icolumn = ls->column = -1;
1090 ls->decpoint = '.';
1091 ls->nodes = eina_hash_pointer_new(EINA_FREE_CB(_node_free));
1092 next_char(ls);
1093
1094 Eolian_Unit *ncunit = calloc(1, sizeof(Eolian_Unit));
1095 if (!ncunit)
1096 {
1097 eo_lexer_free(ls);
1098 eolian_state_panic(state, "out of memory");
1099 }
1100 ls->unit = ncunit;
1101 database_unit_init(state, ncunit, ls->filename);
1102 eina_hash_add(state->staging.units, ls->filename, ncunit);
1103
1104 if (ls->current != 0xEF)
1105 return;
1106 next_char(ls);
1107 if (ls->current != 0xBB)
1108 return;
1109 next_char(ls);
1110 if (ls->current != 0xBF)
1111 return;
1112 next_char(ls);
1113 }
1114
1115 Eolian_Object *
eo_lexer_node_new(Eo_Lexer * ls,size_t objsize)1116 eo_lexer_node_new(Eo_Lexer *ls, size_t objsize)
1117 {
1118 Eolian_Object *obj = calloc(1, objsize);
1119 if (!obj)
1120 longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1121 eina_hash_add(ls->nodes, &obj, obj);
1122 eolian_object_ref(obj);
1123 return obj;
1124 }
1125
1126 Eolian_Object *
eo_lexer_node_release(Eo_Lexer * ls,Eolian_Object * obj)1127 eo_lexer_node_release(Eo_Lexer *ls, Eolian_Object *obj)
1128 {
1129 /* just for debug */
1130 assert(eina_hash_find(ls->nodes, &obj) && (obj->refcount >= 1));
1131 (void)eolian_object_unref(obj);
1132 eina_hash_set(ls->nodes, &obj, NULL);
1133 return obj;
1134 }
1135
1136 static void
_free_tok(Eo_Token * tok)1137 _free_tok(Eo_Token *tok)
1138 {
1139 if (tok->token < START_CUSTOM || tok->token == TOK_NUMBER ||
1140 tok->token == TOK_CHAR)
1141 return;
1142 if (tok->token == TOK_DOC)
1143 {
1144 /* free doc */
1145 if (!tok->value.doc) return;
1146 eina_stringshare_del(tok->value.doc->summary);
1147 eina_stringshare_del(tok->value.doc->description);
1148 free(tok->value.doc);
1149 tok->value.doc = NULL;
1150 return;
1151 }
1152 eina_stringshare_del(tok->value.s);
1153 tok->value.s = NULL;
1154 }
1155
1156 void
eo_lexer_dtor_push(Eo_Lexer * ls,Eina_Free_Cb free_cb,void * data)1157 eo_lexer_dtor_push(Eo_Lexer *ls, Eina_Free_Cb free_cb, void *data)
1158 {
1159 Eo_Lexer_Dtor *dt = malloc(sizeof(Eo_Lexer_Dtor));
1160 if (!dt)
1161 {
1162 free_cb(data);
1163 longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1164 }
1165 dt->free_cb = free_cb;
1166 dt->data = data;
1167 ls->dtors = eina_list_prepend(ls->dtors, dt);
1168 }
1169
1170 void
eo_lexer_dtor_pop(Eo_Lexer * ls)1171 eo_lexer_dtor_pop(Eo_Lexer *ls)
1172 {
1173 Eo_Lexer_Dtor *dt = eina_list_data_get(ls->dtors);
1174 ls->dtors = eina_list_remove_list(ls->dtors, ls->dtors);
1175 dt->free_cb(dt->data);
1176 free(dt);
1177 }
1178
1179 void
eo_lexer_free(Eo_Lexer * ls)1180 eo_lexer_free(Eo_Lexer *ls)
1181 {
1182 if (!ls) return;
1183 if (ls->source ) eina_stringshare_del(ls->source);
1184 if (ls->filename) eina_stringshare_del(ls->filename);
1185 if (ls->buff ) eina_strbuf_free (ls->buff);
1186 if (ls->handle ) eina_file_close (ls->handle);
1187
1188 _free_tok(&ls->t);
1189 eo_lexer_context_clear(ls);
1190
1191 Eo_Lexer_Dtor *dtor;
1192 EINA_LIST_FREE(ls->dtors, dtor)
1193 dtor->free_cb(dtor->data);
1194
1195 eina_hash_free(ls->nodes);
1196
1197 free(ls);
1198 }
1199
1200 Eo_Lexer *
eo_lexer_new(Eolian_State * state,const char * source)1201 eo_lexer_new(Eolian_State *state, const char *source)
1202 {
1203 volatile Eo_Lexer *ls = calloc(1, sizeof(Eo_Lexer));
1204 if (!ls)
1205 eolian_state_panic(state, "out of memory");
1206
1207 if (!setjmp(((Eo_Lexer *)(ls))->err_jmp))
1208 {
1209 eo_lexer_set_input((Eo_Lexer *) ls, state, source);
1210 return (Eo_Lexer *) ls;
1211 }
1212 eo_lexer_free((Eo_Lexer *) ls);
1213 return NULL;
1214 }
1215
1216 int
eo_lexer_get(Eo_Lexer * ls)1217 eo_lexer_get(Eo_Lexer *ls)
1218 {
1219 _free_tok(&ls->t);
1220 if (ls->lookahead.token >= 0)
1221 {
1222 ls->t = ls->lookahead;
1223 ls->lookahead.token = -1;
1224 return ls->t.token;
1225 }
1226 ls->t.kw = 0;
1227 return (ls->t.token = lex(ls, &ls->t));
1228 }
1229
1230 int
eo_lexer_lookahead(Eo_Lexer * ls)1231 eo_lexer_lookahead(Eo_Lexer *ls)
1232 {
1233 assert (ls->lookahead.token < 0);
1234 ls->lookahead.kw = 0;
1235 eo_lexer_context_push(ls);
1236 ls->lookahead.token = lex(ls, &ls->lookahead);
1237 eo_lexer_context_restore(ls);
1238 eo_lexer_context_pop(ls);
1239 return ls->lookahead.token;
1240 }
1241
1242 void
eo_lexer_lex_error(Eo_Lexer * ls,const char * msg,int token)1243 eo_lexer_lex_error(Eo_Lexer *ls, const char *msg, int token)
1244 {
1245 if (token)
1246 {
1247 char buf[256];
1248 txt_token(ls, token, buf);
1249 throw(ls, "%s near '%s'", msg, buf);
1250 }
1251 else
1252 throw(ls, "%s", msg);
1253 }
1254
1255 void
eo_lexer_syntax_error(Eo_Lexer * ls,const char * msg)1256 eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg)
1257 {
1258 eo_lexer_lex_error(ls, msg, ls->t.token);
1259 }
1260
1261 void
eo_lexer_token_to_str(int token,char * buf)1262 eo_lexer_token_to_str(int token, char *buf)
1263 {
1264 if (token < 0)
1265 {
1266 memcpy(buf, "<eof>", 6);
1267 }
1268 else if (token < START_CUSTOM)
1269 {
1270 assert((unsigned char)token == token);
1271 if (iscntrl(token))
1272 sprintf(buf, "char(%d)", token);
1273 else
1274 sprintf(buf, "%c", token);
1275 }
1276 else
1277 {
1278 const char *v;
1279 size_t idx = token - START_CUSTOM;
1280 size_t tsz = sizeof(tokens) / sizeof(tokens[0]);
1281 if (idx >= tsz)
1282 v = keywords[idx - tsz];
1283 else
1284 v = tokens[idx];
1285 memcpy(buf, v, strlen(v) + 1);
1286 }
1287 }
1288
1289 const char *
eo_lexer_keyword_str_get(int kw)1290 eo_lexer_keyword_str_get(int kw)
1291 {
1292 return keywords[kw - 1];
1293 }
1294
1295 Eina_Bool
eo_lexer_is_type_keyword(int kw)1296 eo_lexer_is_type_keyword(int kw)
1297 {
1298 return (kw >= KW_byte && kw < KW_true);
1299 }
1300
1301 int
eo_lexer_keyword_str_to_id(const char * kw)1302 eo_lexer_keyword_str_to_id(const char *kw)
1303 {
1304 return (int)(uintptr_t)eina_hash_find(keyword_map, kw);
1305 }
1306
1307 const char *
eo_lexer_get_c_type(int kw)1308 eo_lexer_get_c_type(int kw)
1309 {
1310 if (!eo_lexer_is_type_keyword(kw)) return NULL;
1311 return ctypes[kw - KW_byte];
1312 }
1313
1314 static Eina_Bool
_eo_is_tokstr(int t)1315 _eo_is_tokstr(int t) {
1316 return (t == TOK_STRING) || (t == TOK_VALUE);
1317 }
1318
1319 void
eo_lexer_context_push(Eo_Lexer * ls)1320 eo_lexer_context_push(Eo_Lexer *ls)
1321 {
1322 Lexer_Ctx *ctx = malloc(sizeof(Lexer_Ctx));
1323 if (!ctx)
1324 longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1325 ctx->line = ls->line_number;
1326 ctx->column = ls->column;
1327 ctx->linestr = ls->stream_line;
1328 ctx->token = ls->t;
1329 if (_eo_is_tokstr(ctx->token.token))
1330 eina_stringshare_ref(ctx->token.value.s);
1331 ls->saved_ctxs = eina_list_prepend(ls->saved_ctxs, ctx);
1332 }
1333
1334 void
eo_lexer_context_pop(Eo_Lexer * ls)1335 eo_lexer_context_pop(Eo_Lexer *ls)
1336 {
1337 Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
1338 if (_eo_is_tokstr(ctx->token.token))
1339 eina_stringshare_del(ctx->token.value.s);
1340 free(ctx);
1341 ls->saved_ctxs = eina_list_remove_list(ls->saved_ctxs, ls->saved_ctxs);
1342 }
1343
1344 void
eo_lexer_context_restore(Eo_Lexer * ls)1345 eo_lexer_context_restore(Eo_Lexer *ls)
1346 {
1347 if (!eina_list_count(ls->saved_ctxs)) return;
1348 Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
1349 ls->line_number = ctx->line;
1350 ls->column = ctx->column;
1351 ls->stream_line = ctx->linestr;
1352 if (_eo_is_tokstr(ls->t.token))
1353 eina_stringshare_del(ls->t.value.s);
1354 ls->t = ctx->token;
1355 if (_eo_is_tokstr(ls->t.token))
1356 eina_stringshare_ref(ls->t.value.s);
1357 }
1358
1359 void
eo_lexer_context_clear(Eo_Lexer * ls)1360 eo_lexer_context_clear(Eo_Lexer *ls)
1361 {
1362 Lexer_Ctx *ctx;
1363 EINA_LIST_FREE(ls->saved_ctxs, ctx) free(ctx);
1364 }
1365