1 #ifdef HAVE_CONFIG_H
2 # include "config.h"
3 #endif
4 
5 #include <stdio.h>
6 #include <ctype.h>
7 #include <locale.h>
8 
9 #include <setjmp.h>
10 #include <assert.h>
11 
12 #include "eo_lexer.h"
13 #include "eolian_priv.h"
14 
15 static int lastbytes = 0;
16 
17 static void
next_char(Eo_Lexer * ls)18 next_char(Eo_Lexer *ls)
19 {
20    int nb;
21    Eina_Bool end = EINA_FALSE;
22 
23    if (ls->stream == ls->stream_end)
24      {
25         end = EINA_TRUE;
26         ls->current = '\0';
27      }
28    else
29      ls->current = *(ls->stream++);
30 
31    nb = lastbytes;
32    if (!nb && end) nb = 1;
33    if (!nb) eina_unicode_utf8_next_get(ls->stream - 1, &nb);
34 
35    if (nb == 1)
36      {
37         nb = 0;
38         ++ls->icolumn;
39         ls->column = ls->icolumn;
40      }
41    else --nb;
42 
43    lastbytes = nb;
44 }
45 
46 #define KW(x) #x
47 #define KWAT(x) "@" #x
48 #define KWH(x) "#" #x
49 
50 static const char * const tokens[] =
51 {
52    "==", "!=", ">=", "<=", "&&", "||", "<<", ">>",
53    "<doc>", "<string>", "<char>", "<number>", "<value>"
54 };
55 
56 static const char * const keywords[] = { KEYWORDS };
57 
58 static const char * const ctypes[] =
59 {
60    "signed char", "unsigned char", "char", "short", "unsigned short", "int",
61    "unsigned int", "long", "unsigned long", "long long", "unsigned long long",
62 
63    "int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t",
64    "int64_t", "uint64_t", "int128_t", "uint128_t",
65 
66    "size_t", "ssize_t", "intptr_t", "uintptr_t", "ptrdiff_t",
67 
68    "time_t",
69 
70    "float", "double",
71 
72    "Eina_Bool",
73 
74    "Eina_Slice", "Eina_Rw_Slice",
75 
76    "void",
77 
78    "Eina_Accessor *", "Eina_Array *", "Eina_Future *", "Eina_Iterator *",
79    "Eina_List *",
80    "Eina_Value", "Eina_Value *", "Eina_Binbuf *", "Efl_Event *",
81    "char *", "const char *", "Eina_Stringshare *", "Eina_Strbuf *",
82 
83    "Eina_Hash *",
84    "void *",
85 
86    "function",
87 };
88 
89 #undef KW
90 #undef KWAT
91 #undef KWH
92 
93 #define is_newline(c) ((c) == '\n' || (c) == '\r')
94 
95 static Eina_Hash *keyword_map = NULL;
96 
97 static void
throw(Eo_Lexer * ls,const char * fmt,...)98 throw(Eo_Lexer *ls, const char *fmt, ...)
99 {
100    const char *ln = ls->stream_line, *end = ls->stream_end;
101    Eina_Strbuf *buf = eina_strbuf_new();
102    int i;
103    va_list ap;
104    va_start(ap, fmt);
105    eina_strbuf_append_vprintf(buf, fmt, ap);
106    va_end(ap);
107    eina_strbuf_append(buf, "\n ");
108    while (ln != end && !is_newline(*ln))
109      eina_strbuf_append_char(buf,*(ln++));
110    eina_strbuf_append_char(buf, '\n');
111    for (i = 0; i < ls->column; ++i)
112      eina_strbuf_append_char(buf, ' ');
113    eina_strbuf_append(buf, "^\n");
114    Eolian_Object tmp;
115    memset(&tmp, 0, sizeof(Eolian_Object));
116    tmp.unit = ls->unit;
117    tmp.file = ls->source;
118    tmp.line = ls->line_number;
119    tmp.column = ls->column;
120    eolian_state_log_obj(ls->state, &tmp, "%s", eina_strbuf_string_get(buf));
121    eina_strbuf_free(buf);
122    longjmp(ls->err_jmp, EO_LEXER_ERROR_NORMAL);
123 }
124 
125 void
eo_lexer_init(void)126 eo_lexer_init(void)
127 {
128    unsigned int i;
129    if (keyword_map) return;
130    keyword_map = eina_hash_string_superfast_new(NULL);
131    for (i = 0; i < (sizeof(keywords) / sizeof(keywords[0])); ++i)
132      eina_hash_add(keyword_map, keywords[i], (void *)(size_t)(i + 1));
133 }
134 
135 void
eo_lexer_shutdown(void)136 eo_lexer_shutdown(void)
137 {
138    if (keyword_map)
139      {
140         eina_hash_free(keyword_map);
141         keyword_map = NULL;
142      }
143 }
144 
145 static void
txt_token(Eo_Lexer * ls,int token,char * buf)146 txt_token(Eo_Lexer *ls, int token, char *buf)
147 {
148    if (token == TOK_VALUE)
149      memcpy(buf, ls->t.value.s, strlen(ls->t.value.s) + 1);
150    else
151      return eo_lexer_token_to_str(token, buf);
152 }
153 
154 void eo_lexer_lex_error   (Eo_Lexer *ls, const char *msg, int token);
155 void eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg);
156 
next_line(Eo_Lexer * ls)157 static void next_line(Eo_Lexer *ls)
158 {
159    int old = ls->current;
160    assert(is_newline(ls->current));
161    ls->stream_line = ls->stream;
162    next_char(ls);
163    if (is_newline(ls->current) && ls->current != old)
164      {
165        next_char(ls);
166        ls->stream_line = ls->stream;
167      }
168    if (++ls->iline_number >= INT_MAX)
169      eo_lexer_syntax_error(ls, "chunk has too many lines");
170    ls->line_number = ls->iline_number;
171    ls->icolumn = ls->column = 0;
172 }
173 
skip_ws(Eo_Lexer * ls)174 static void skip_ws(Eo_Lexer *ls)
175 {
176    while (isspace(ls->current) && !is_newline(ls->current))
177      next_char(ls);
178 }
179 
180 /* go to next line and strip leading whitespace */
next_line_ws(Eo_Lexer * ls)181 static void next_line_ws(Eo_Lexer *ls)
182 {
183    next_line(ls);
184    skip_ws(ls);
185 }
186 
187 static Eina_Bool
should_skip_star(Eo_Lexer * ls,int ccol,Eina_Bool * term)188 should_skip_star(Eo_Lexer *ls, int ccol, Eina_Bool *term)
189 {
190    Eina_Bool had_star = EINA_FALSE;
191    if (ls->column == ccol && ls->current == '*')
192      {
193         had_star = EINA_TRUE;
194         next_char(ls);
195         if (ls->current == '/')
196           {
197              next_char(ls);
198              *term = EINA_TRUE;
199              return EINA_FALSE;
200           }
201         skip_ws(ls);
202      }
203    return had_star;
204 }
205 
206 static void
read_long_comment(Eo_Lexer * ls,int ccol)207 read_long_comment(Eo_Lexer *ls, int ccol)
208 {
209    Eina_Bool had_star = EINA_FALSE, had_nl = EINA_FALSE;
210    eina_strbuf_reset(ls->buff);
211 
212    if (is_newline(ls->current))
213      {
214         Eina_Bool term = EINA_FALSE;
215         had_nl = EINA_TRUE;
216         next_line_ws(ls);
217         had_star = should_skip_star(ls, ccol, &term);
218         if (term) goto cend;
219      }
220 
221    for (;;)
222      {
223         if (!ls->current)
224           eo_lexer_lex_error(ls, "unfinished long comment", -1);
225         if (ls->current == '*')
226           {
227              next_char(ls);
228              if (ls->current == '/')
229                {
230                   next_char(ls);
231                   break;
232                }
233              eina_strbuf_append_char(ls->buff, '*');
234           }
235         else if (is_newline(ls->current))
236           {
237              eina_strbuf_append_char(ls->buff, '\n');
238              next_line_ws(ls);
239              if (!had_nl)
240                {
241                   Eina_Bool term = EINA_FALSE;
242                   had_nl = EINA_TRUE;
243                   had_star = should_skip_star(ls, ccol, &term);
244                   if (term) break;
245                }
246              else if (had_star && ls->column == ccol && ls->current == '*')
247                {
248                   next_char(ls);
249                   if (ls->current == '/')
250                     {
251                        next_char(ls);
252                        break;
253                     }
254                   skip_ws(ls);
255                 }
256           }
257         else
258           {
259              eina_strbuf_append_char(ls->buff, ls->current);
260              next_char(ls);
261           }
262      }
263 cend:
264    eina_strbuf_trim(ls->buff);
265 }
266 
267 enum Doc_Tokens {
268     DOC_MANGLED = -2, DOC_UNFINISHED = -1, DOC_TEXT = 0, DOC_SINCE = 1
269 };
270 
271 static void
doc_ref_class(Eo_Lexer * ls,const char * cname)272 doc_ref_class(Eo_Lexer *ls, const char *cname)
273 {
274    size_t clen = strlen(cname);
275    char *buf = alloca(clen + 4);
276    memcpy(buf, cname, clen);
277    buf[clen] = '\0';
278    for (char *p = buf; *p; ++p)
279      {
280         if (*p == '.')
281           *p = '_';
282         else
283           *p = tolower(*p);
284      }
285    memcpy(buf + clen, ".eo", sizeof(".eo"));
286    if (!eina_hash_find(ls->state->filenames_eo, buf))
287      return;
288    /* ref'd classes do not become dependencies */
289    database_defer(ls->state, buf, EINA_FALSE);
290 }
291 
292 static void
doc_ref(Eo_Lexer * ls,Eolian_Documentation * doc)293 doc_ref(Eo_Lexer *ls, Eolian_Documentation *doc)
294 {
295    const char *st = ls->stream, *ste = ls->stream_end;
296    size_t rlen = 0;
297    while ((st != ste) && ((*st == '.') || (*st == '_') || isalnum(*st)))
298      {
299         ++st;
300         ++rlen;
301      }
302    if ((rlen > 1) && (*(st - 1) == '.'))
303      --rlen;
304    if (!rlen)
305      return;
306    if (*ls->stream == '.')
307      return;
308 
309    char *buf = alloca(rlen + 1);
310    memcpy(buf, ls->stream, rlen);
311    buf[rlen] = '\0';
312 
313    /* actual full class name */
314    doc_ref_class(ls, buf);
315 
316    /* it's definitely a reference, add debug info
317     * 20 bits for line and 12 bits for column, good enough
318     */
319    doc->ref_dbg = eina_list_append(doc->ref_dbg,
320      (void *)(size_t)((ls->line_number & 0xFFFFF) | (((ls->column + 1) & 0xFFF) << 20)));
321 
322    /* method name at the end */
323    char *end = strrchr(buf, '.');
324    if (!end)
325      return;
326    *end = '\0';
327    doc_ref_class(ls, buf);
328 
329    /* .get or .set at the end, handle possible property */
330    if (strcmp(end + 1, "get") && strcmp(end + 1, "set"))
331      return;
332    end = strrchr(buf, '.');
333    if (!end)
334      return;
335    *end = '\0';
336    doc_ref_class(ls, buf);
337 }
338 
339 static int
doc_lex(Eo_Lexer * ls,Eolian_Documentation * doc,Eina_Bool * term,Eina_Bool * since)340 doc_lex(Eo_Lexer *ls, Eolian_Documentation *doc, Eina_Bool *term, Eina_Bool *since)
341 {
342    int tokret = -1;
343    eina_strbuf_reset(ls->buff);
344    *since = EINA_FALSE;
345    for (;;) switch (ls->current)
346      {
347       /* error case */
348       case '\0':
349         return DOC_UNFINISHED;
350       /* newline case: if two or more newlines are present, new paragraph
351        * if only one newline is present, append space to the text buffer
352        * when starting new paragraph, reset doc continutation
353        */
354       case '\n':
355       case '\r':
356         next_line(ls);
357         skip_ws(ls);
358         if (!is_newline(ls->current))
359           {
360              eina_strbuf_append_char(ls->buff, ' ');
361              continue;
362           }
363         while (is_newline(ls->current))
364           next_line_ws(ls);
365         tokret = DOC_TEXT;
366         goto exit_with_token;
367       /* escape case: for any \X, output \X
368        * except for \\]], then output just ]]
369        */
370       case '\\':
371         next_char(ls);
372         if (ls->current == ']')
373           {
374              next_char(ls);
375              if (ls->current == ']')
376                {
377                   next_char(ls);
378                   eina_strbuf_append(ls->buff, "]]");
379                }
380              else
381                eina_strbuf_append(ls->buff, "\\]");
382           }
383         else
384           eina_strbuf_append_char(ls->buff, '\\');
385         continue;
386       /* terminating case */
387       case ']':
388         next_char(ls);
389         if (ls->current == ']')
390           {
391              /* terminate doc */
392              tokret = DOC_TEXT;
393              goto terminated;
394           }
395         eina_strbuf_append_char(ls->buff, ']');
396         continue;
397       /* references and @since */
398       case '@':
399         if ((size_t)(ls->stream_end - ls->stream) >= (sizeof("since")) &&
400             !memcmp(ls->stream, "since ", sizeof("since")))
401           {
402              next_char(ls);
403              *since = EINA_TRUE;
404              for (size_t i = 0; i < sizeof("since"); ++i)
405                next_char(ls);
406              skip_ws(ls);
407              tokret = DOC_TEXT;
408              goto exit_with_token;
409           }
410         doc_ref(ls, doc);
411         eina_strbuf_append_char(ls->buff, '@');
412         next_char(ls);
413         /* in-class references */
414         if (ls->klass && ls->current == '.')
415           {
416              next_char(ls);
417              if (isalpha(ls->current) || ls->current == '_')
418                eina_strbuf_append(ls->buff, ls->klass->base.name);
419              eina_strbuf_append_char(ls->buff, '.');
420           }
421         continue;
422       /* default case - append character */
423       default:
424         eina_strbuf_append_char(ls->buff, ls->current);
425         next_char(ls);
426         continue;
427      }
428 terminated:
429    next_char(ls);
430    *term = EINA_TRUE;
431 exit_with_token:
432    eina_strbuf_trim(ls->buff);
433    return tokret;
434 }
435 
436 static int
read_since(Eo_Lexer * ls)437 read_since(Eo_Lexer *ls)
438 {
439    eina_strbuf_reset(ls->buff);
440    while (ls->current && (ls->current == '.' ||
441                           ls->current == '_' ||
442                           isalnum(ls->current)))
443      {
444         eina_strbuf_append_char(ls->buff, ls->current);
445         next_char(ls);
446      }
447    if (!eina_strbuf_length_get(ls->buff))
448      return DOC_UNFINISHED;
449    skip_ws(ls);
450    while (is_newline(ls->current))
451      next_line_ws(ls);
452    if (ls->current != ']')
453      return DOC_MANGLED;
454    next_char(ls);
455    if (ls->current != ']')
456      return DOC_MANGLED;
457    next_char(ls);
458    return DOC_SINCE;
459 }
460 
doc_error(Eo_Lexer * ls,const char * msg,Eolian_Documentation * doc,Eina_Strbuf * buf)461 void doc_error(Eo_Lexer *ls, const char *msg, Eolian_Documentation *doc, Eina_Strbuf *buf)
462 {
463    eina_stringshare_del(doc->summary);
464    eina_stringshare_del(doc->description);
465    eina_list_free(doc->ref_dbg);
466    free(doc);
467    eina_strbuf_free(buf);
468    eo_lexer_lex_error(ls, msg, -1);
469 }
470 
471 static void
read_doc(Eo_Lexer * ls,Eo_Token * tok,int line,int column)472 read_doc(Eo_Lexer *ls, Eo_Token *tok, int line, int column)
473 {
474    Eolian_Documentation *doc = calloc(1, sizeof(Eolian_Documentation));
475    if (!doc)
476      longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
477 
478    doc->base.unit = ls->unit;
479    doc->base.file = ls->filename;
480    doc->base.line = line;
481    doc->base.column = column;
482    doc->base.type = EOLIAN_OBJECT_DOCUMENTATION;
483 
484    Eina_Strbuf *rbuf = eina_strbuf_new();
485 
486    Eina_Bool term = EINA_FALSE, since = EINA_FALSE;
487    while (!term)
488      {
489         int read;
490         if (since)
491           {
492              read = read_since(ls);
493              term = EINA_TRUE;
494           }
495         else
496           read = doc_lex(ls, doc, &term, &since);
497         switch (read)
498           {
499            case DOC_MANGLED:
500              doc_error(ls, "mangled documentation", doc, rbuf);
501              return;
502            case DOC_UNFINISHED:
503              doc_error(ls, "unfinished documentation", doc, rbuf);
504              return;
505            case DOC_TEXT:
506              if (!eina_strbuf_length_get(ls->buff))
507                continue;
508              if (!doc->summary)
509                doc->summary = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
510              else
511                {
512                   if (eina_strbuf_length_get(rbuf))
513                     eina_strbuf_append(rbuf, "\n\n");
514                   eina_strbuf_append(rbuf, eina_strbuf_string_get(ls->buff));
515                }
516              break;
517            case DOC_SINCE:
518              doc->since = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
519              break;
520           }
521      }
522 
523    if (eina_strbuf_length_get(rbuf))
524      doc->description = eina_stringshare_add(eina_strbuf_string_get(rbuf));
525    if (!doc->since && ls->klass && ls->klass->doc)
526      doc->since = eina_stringshare_ref(ls->klass->doc->since);
527    eina_strbuf_free(rbuf);
528    tok->value.doc = doc;
529 }
530 
531 static void
esc_error(Eo_Lexer * ls,int * c,int n,const char * msg)532 esc_error(Eo_Lexer *ls, int *c, int n, const char *msg)
533 {
534    int i;
535    eina_strbuf_reset(ls->buff);
536    eina_strbuf_append_char(ls->buff, '\\');
537    for (i = 0; i < n && c[i]; ++i)
538      eina_strbuf_append_char(ls->buff, c[i]);
539    eo_lexer_lex_error(ls, msg, TOK_STRING);
540 }
541 
542 static int
hex_val(int c)543 hex_val(int c)
544 {
545    if (c >= 'a') return c - 'a' + 10;
546    if (c >= 'A') return c - 'A' + 10;
547    return c - '0';
548 }
549 
550 static int
read_hex_esc(Eo_Lexer * ls)551 read_hex_esc(Eo_Lexer *ls)
552 {
553    int c[3] = { 'x' };
554    int i, r = 0;
555    for (i = 1; i < 3; ++i)
556      {
557         next_char(ls);
558         c[i] = ls->current;
559         if (!isxdigit(c[i]))
560           esc_error(ls, c, i + 1, "hexadecimal digit expected");
561         r = (r << 4) + hex_val(c[i]);
562      }
563    return r;
564 }
565 
566 static int
read_dec_esc(Eo_Lexer * ls)567 read_dec_esc(Eo_Lexer *ls)
568 {
569    int c[3];
570    int i, r = 0;
571    for (i = 0; i < 3 && isdigit(ls->current); ++i)
572      {
573         c[i] = ls->current;
574         r = r * 10 + (c[i] - '0');
575         next_char(ls);
576      }
577    if (r > UCHAR_MAX)
578      esc_error(ls, c, i, "decimal escape too large");
579    return r;
580 }
581 
582 static void
read_escape(Eo_Lexer * ls)583 read_escape(Eo_Lexer *ls)
584 {
585    switch (ls->current)
586      {
587       case 'a': eina_strbuf_append_char(ls->buff, '\a'); next_char(ls); break;
588       case 'b': eina_strbuf_append_char(ls->buff, '\b'); next_char(ls); break;
589       case 'f': eina_strbuf_append_char(ls->buff, '\f'); next_char(ls); break;
590       case 'n': eina_strbuf_append_char(ls->buff, '\n'); next_char(ls); break;
591       case 'r': eina_strbuf_append_char(ls->buff, '\r'); next_char(ls); break;
592       case 't': eina_strbuf_append_char(ls->buff, '\t'); next_char(ls); break;
593       case 'v': eina_strbuf_append_char(ls->buff, '\v'); next_char(ls); break;
594       case 'x':
595         eina_strbuf_append_char(ls->buff, read_hex_esc(ls));
596         next_char(ls);
597         break;
598       case '\n': case '\r':
599         next_line(ls);
600         eina_strbuf_append_char(ls->buff, '\n');
601         break;
602       case '\\': case '"': case '\'':
603         eina_strbuf_append_char(ls->buff, ls->current);
604         break;
605       case '\0':
606         break;
607       default:
608         if (!isdigit(ls->current))
609           esc_error(ls, &ls->current, 1, "invalid escape sequence");
610         eina_strbuf_append_char(ls->buff, read_dec_esc(ls));
611         break;
612      }
613 }
614 
615 static void
read_string(Eo_Lexer * ls,Eo_Token * tok)616 read_string(Eo_Lexer *ls, Eo_Token *tok)
617 {
618    eina_strbuf_reset(ls->buff);
619    eina_strbuf_append_char(ls->buff, '"');
620    next_char(ls);
621    while (ls->current != '"') switch (ls->current)
622      {
623       case '\0':
624         eo_lexer_lex_error(ls, "unfinished string", -1);
625         break;
626       case '\n': case '\r':
627         eo_lexer_lex_error(ls, "unfinished string", TOK_STRING);
628         break;
629       case '\\':
630         {
631            next_char(ls);
632            read_escape(ls);
633            break;
634         }
635       default:
636         eina_strbuf_append_char(ls->buff, ls->current);
637         next_char(ls);
638      }
639    eina_strbuf_append_char(ls->buff, ls->current);
640    next_char(ls);
641    tok->value.s = eina_stringshare_add_length(eina_strbuf_string_get(ls->buff) + 1,
642                                 (unsigned int)eina_strbuf_length_get(ls->buff) - 2);
643 }
644 
645 static int
get_type(Eo_Lexer * ls,Eina_Bool is_float)646 get_type(Eo_Lexer *ls, Eina_Bool is_float)
647 {
648    if (is_float)
649      {
650         if (ls->current == 'f' || ls->current == 'F')
651           {
652              next_char(ls);
653              return NUM_FLOAT;
654           }
655         return NUM_DOUBLE;
656      }
657    if (ls->current == 'u' || ls->current == 'U')
658      {
659         next_char(ls);
660         if (ls->current == 'l' || ls->current == 'L')
661           {
662              next_char(ls);
663              if (ls->current == 'l' || ls->current == 'L')
664                {
665                   next_char(ls);
666                   return NUM_ULLONG;
667                }
668              return NUM_ULONG;
669           }
670         return NUM_UINT;
671      }
672    if (ls->current == 'l' || ls->current == 'L')
673      {
674         next_char(ls);
675         if (ls->current == 'l' || ls->current == 'L')
676           {
677              next_char(ls);
678              return NUM_LLONG;
679           }
680         return NUM_LONG;
681      }
682    return NUM_INT;
683 }
684 
685 static void
replace_decpoint(Eo_Lexer * ls,char prevdecp)686 replace_decpoint(Eo_Lexer *ls, char prevdecp)
687 {
688    if (ls->decpoint == prevdecp) return;
689    char *bufs = eina_strbuf_string_steal(ls->buff);
690    char *p = bufs;
691    while ((p = strchr(p, prevdecp))) *p = ls->decpoint;
692    eina_strbuf_append(ls->buff, bufs);
693    free(bufs);
694 }
695 
696 static void
write_val_with_decpoint(Eo_Lexer * ls,Eo_Token * tok,int type)697 write_val_with_decpoint(Eo_Lexer *ls, Eo_Token *tok, int type)
698 {
699    struct lconv *lc = localeconv();
700    char prev = ls->decpoint;
701    ls->decpoint = lc ? lc->decimal_point[0] : '.';
702    if (ls->decpoint == prev)
703      {
704         eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
705         return;
706      }
707    replace_decpoint(ls, prev);
708    char *end = NULL;
709    if (type == NUM_FLOAT)
710      tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
711    else if (type == NUM_DOUBLE)
712      tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
713    if (end && end[0])
714      eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
715    tok->kw = type;
716 }
717 
718 static void
write_val(Eo_Lexer * ls,Eo_Token * tok,Eina_Bool is_float)719 write_val(Eo_Lexer *ls, Eo_Token *tok, Eina_Bool is_float)
720 {
721    int type = get_type(ls, is_float);
722    char *end = NULL;
723    if (is_float)
724      {
725         replace_decpoint(ls, '.');
726         if (type == NUM_FLOAT)
727           tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
728         else if (type == NUM_DOUBLE)
729           tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
730      }
731    else
732      {
733         const char *str = eina_strbuf_string_get(ls->buff);
734         /* signed is always in the same memory location */
735         if (type == NUM_INT || type == NUM_UINT)
736           tok->value.u = strtoul(str, &end, 0);
737         else if (type == NUM_LONG || type == NUM_ULONG)
738           tok->value.ul = strtoul(str, &end, 0);
739         else if (type == NUM_LLONG || type == NUM_ULLONG)
740           tok->value.ull = strtoull(str, &end, 0);
741      }
742    if (end && end[0])
743      {
744         if (is_float)
745           {
746              write_val_with_decpoint(ls, tok, type);
747              return;
748           }
749         eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
750      }
751    tok->kw = type;
752 }
753 
754 static void
write_exp(Eo_Lexer * ls)755 write_exp(Eo_Lexer *ls)
756 {
757    eina_strbuf_append_char(ls->buff, ls->current);
758    next_char(ls);
759    if (ls->current == '+' || ls->current == '-')
760      {
761         eina_strbuf_append_char(ls->buff, ls->current);
762         next_char(ls);
763         while (isdigit(ls->current))
764           {
765              eina_strbuf_append_char(ls->buff, ls->current);
766              next_char(ls);
767           }
768      }
769 }
770 
771 static void
read_hex_number(Eo_Lexer * ls,Eo_Token * tok)772 read_hex_number(Eo_Lexer *ls, Eo_Token *tok)
773 {
774    Eina_Bool is_float = EINA_FALSE;
775    while (isxdigit(ls->current) || ls->current == '.')
776      {
777         eina_strbuf_append_char(ls->buff, ls->current);
778         if (ls->current == '.') is_float = EINA_TRUE;
779         next_char(ls);
780      }
781    if (is_float && (ls->current != 'p' && ls->current != 'P'))
782      {
783         eo_lexer_lex_error(ls, "hex float literals require an exponent",
784                            TOK_NUMBER);
785      }
786    if (ls->current == 'p' || ls->current == 'P')
787      {
788         is_float = EINA_TRUE;
789          write_exp(ls);
790      }
791    write_val(ls, tok, is_float);
792 }
793 
794 static void
read_number(Eo_Lexer * ls,Eo_Token * tok)795 read_number(Eo_Lexer *ls, Eo_Token *tok)
796 {
797    Eina_Bool is_float = eina_strbuf_string_get(ls->buff)[0] == '.';
798    if (ls->current == '0' && !is_float)
799      {
800         eina_strbuf_append_char(ls->buff, ls->current);
801         next_char(ls);
802         if (ls->current == 'x' || ls->current == 'X')
803           {
804              eina_strbuf_append_char(ls->buff, ls->current);
805              next_char(ls);
806              read_hex_number(ls, tok);
807              return;
808           }
809      }
810    while (isdigit(ls->current) || ls->current == '.')
811      {
812         eina_strbuf_append_char(ls->buff, ls->current);
813         if (ls->current == '.') is_float = EINA_TRUE;
814         next_char(ls);
815      }
816    if (ls->current == 'e' || ls->current == 'E')
817      {
818         is_float = EINA_TRUE;
819          write_exp(ls);
820      }
821    write_val(ls, tok, is_float);
822 }
823 
824 static int
lex(Eo_Lexer * ls,Eo_Token * tok)825 lex(Eo_Lexer *ls, Eo_Token *tok)
826 {
827    eina_strbuf_reset(ls->buff);
828    tok->value.s = NULL;
829    for (;;) switch (ls->current)
830      {
831       case '\n':
832       case '\r':
833         next_line(ls);
834         continue;
835       case '/':
836         {
837            next_char(ls);
838            if (ls->current == '*')
839              {
840                 int ccol = ls->column;
841                 next_char(ls);
842                 if (ls->current == '@')
843                   {
844                      eo_lexer_lex_error(ls, "old style documentation comment", -1);
845                      return -1; /* unreachable */
846                   }
847                 read_long_comment(ls, ccol);
848                 continue;
849              }
850            else if (ls->current != '/') return '/';
851            next_char(ls);
852            while (ls->current && !is_newline(ls->current))
853              next_char(ls);
854            continue;
855         }
856       case '[':
857         {
858            int dline = ls->line_number, dcol = ls->column;
859            const char *sline = ls->stream_line;
860            next_char(ls);
861            if (ls->current != '[') return '[';
862            next_char(ls);
863            read_doc(ls, tok, dline, dcol);
864            ls->column = dcol + 1;
865            /* doc is the only potentially multiline token */
866            ls->line_number = dline;
867            ls->stream_line = sline;
868            return TOK_DOC;
869         }
870       case '\0':
871         return -1;
872       case '=':
873         next_char(ls);
874         if (!ls->expr_mode || (ls->current != '=')) return '=';
875         next_char(ls);
876         --ls->column;
877         return TOK_EQ;
878       case '!':
879         next_char(ls);
880         if (!ls->expr_mode || (ls->current != '=')) return '!';
881         next_char(ls);
882         --ls->column;
883         return TOK_NQ;
884       case '>':
885         next_char(ls);
886         if (!ls->expr_mode) return '>';
887         if (ls->current == '=')
888           {
889              next_char(ls);
890              --ls->column;
891              return TOK_GE;
892           }
893         else if (ls->current == '>')
894           {
895              next_char(ls);
896              --ls->column;
897              return TOK_RSH;
898           }
899         return '>';
900       case '<':
901         next_char(ls);
902         if (!ls->expr_mode) return '<';
903         if (ls->current == '=')
904           {
905              next_char(ls);
906              --ls->column;
907              return TOK_LE;
908           }
909         else if (ls->current == '<')
910           {
911              next_char(ls);
912              --ls->column;
913              return TOK_LSH;
914           }
915         return '<';
916       case '&':
917         next_char(ls);
918         if (!ls->expr_mode || (ls->current != '&')) return '&';
919         next_char(ls);
920         --ls->column;
921         return TOK_AND;
922       case '|':
923         next_char(ls);
924         if (!ls->expr_mode || (ls->current != '|')) return '|';
925         next_char(ls);
926         --ls->column;
927         return TOK_OR;
928       case '"':
929         {
930            int dcol = ls->column;
931            if (!ls->expr_mode)
932              {
933                 next_char(ls);
934                 return '"';
935              }
936            /* strings are not multiline for now at least */
937            read_string(ls, tok);
938            ls->column = dcol + 1;
939            return TOK_STRING;
940         }
941       case '\'':
942         {
943            int dcol = ls->column;
944            next_char(ls);
945            if (!ls->expr_mode) return '\'';
946            if (ls->current == '\\')
947              {
948                 next_char(ls);
949                 eina_strbuf_reset(ls->buff);
950                 read_escape(ls);
951                 tok->value.c = (char)*eina_strbuf_string_get(ls->buff);
952              }
953            else
954              {
955                 tok->value.c = ls->current;
956                 next_char(ls);
957              }
958            if (ls->current != '\'')
959              eo_lexer_lex_error(ls, "unfinished character", TOK_CHAR);
960            next_char(ls);
961            ls->column = dcol + 1;
962            return TOK_CHAR;
963         }
964       case '.':
965         {
966            int dcol = ls->column;
967            next_char(ls);
968            if (!isdigit(ls->current)) return '.';
969            eina_strbuf_reset(ls->buff);
970            eina_strbuf_append_char(ls->buff, '.');
971            read_number(ls, tok);
972            ls->column = dcol + 1;
973            return TOK_NUMBER;
974         }
975       default:
976         {
977            if (isspace(ls->current))
978              {
979                 assert(!is_newline(ls->current));
980                 next_char(ls);
981                 continue;
982              }
983            else if (isdigit(ls->current))
984              {
985                 int col = ls->column;
986                 eina_strbuf_reset(ls->buff);
987                 read_number(ls, tok);
988                 ls->column = col + 1;
989                 return TOK_NUMBER;
990              }
991            if (ls->current && (isalnum(ls->current)
992                || ls->current == '@' || ls->current == '#' || ls->current == '_'))
993              {
994                 int col = ls->column;
995                 Eina_Bool pfx_kw = (ls->current == '@') || (ls->current == '#');
996                 const char *str;
997                 eina_strbuf_reset(ls->buff);
998                 do
999                   {
1000                      eina_strbuf_append_char(ls->buff, ls->current);
1001                      next_char(ls);
1002                   }
1003                 while (ls->current && (isalnum(ls->current)
1004                        || ls->current == '_'));
1005                 str     = eina_strbuf_string_get(ls->buff);
1006                 tok->kw = (int)(uintptr_t)eina_hash_find(keyword_map,
1007                                                         str);
1008                 ls->column = col + 1;
1009                 tok->value.s = eina_stringshare_add(str);
1010                 if (pfx_kw && tok->kw == 0)
1011                   eo_lexer_syntax_error(ls, "invalid keyword");
1012                 return TOK_VALUE;
1013              }
1014            else
1015              {
1016                 int c = ls->current;
1017                 next_char(ls);
1018                 return c;
1019              }
1020         }
1021      }
1022 }
1023 
1024 static const char *
get_filename(Eo_Lexer * ls)1025 get_filename(Eo_Lexer *ls)
1026 {
1027    const char *fslash = strrchr(ls->source, '/');
1028    const char *bslash = strrchr(ls->source, '\\');
1029    if (fslash || bslash)
1030      return eina_stringshare_add((fslash > bslash) ? (fslash + 1) : (bslash + 1));
1031    return eina_stringshare_ref(ls->source);
1032 }
1033 
1034 static void
_node_free(Eolian_Object * obj)1035 _node_free(Eolian_Object *obj)
1036 {
1037 #if 0
1038    /* for when we have a proper node allocator and collect on shutdown */
1039    if (obj->refcount > 1)
1040      {
1041         eolian_state_log(obj->state, "node %p (type %d, name %s at %s:%d:%d)"
1042                          " dangling ref (count: %d)", obj, obj->type, obj->name,
1043                          obj->file, obj->line, obj->column, obj->refcount);
1044      }
1045 #endif
1046    switch (obj->type)
1047      {
1048       case EOLIAN_OBJECT_CLASS:
1049         database_class_del((Eolian_Class *)obj);
1050         break;
1051       case EOLIAN_OBJECT_TYPEDECL:
1052         database_typedecl_del((Eolian_Typedecl *)obj);
1053         break;
1054       case EOLIAN_OBJECT_TYPE:
1055         database_type_del((Eolian_Type *)obj);
1056         break;
1057       case EOLIAN_OBJECT_CONSTANT:
1058         database_constant_del((Eolian_Constant *)obj);
1059         break;
1060       case EOLIAN_OBJECT_EXPRESSION:
1061         database_expr_del((Eolian_Expression *)obj);
1062         break;
1063       default:
1064         /* normally unreachable, just for debug */
1065         assert(0);
1066         break;
1067      }
1068 }
1069 
1070 static void
eo_lexer_set_input(Eo_Lexer * ls,Eolian_State * state,const char * source)1071 eo_lexer_set_input(Eo_Lexer *ls, Eolian_State *state, const char *source)
1072 {
1073    Eina_File *f = eina_file_open(source, EINA_FALSE);
1074    if (!f)
1075      {
1076         eolian_state_log(state, "%s", strerror(errno));
1077         longjmp(ls->err_jmp, EO_LEXER_ERROR_NORMAL);
1078      }
1079    ls->lookahead.token = -1;
1080    ls->state           = state;
1081    ls->buff            = eina_strbuf_new();
1082    ls->handle          = f;
1083    ls->stream          = eina_file_map_all(f, EINA_FILE_RANDOM);
1084    ls->stream_end      = ls->stream + eina_file_size_get(f);
1085    ls->stream_line     = ls->stream;
1086    ls->source          = eina_stringshare_add(source);
1087    ls->filename        = get_filename(ls);
1088    ls->iline_number    = ls->line_number = 1;
1089    ls->icolumn         = ls->column = -1;
1090    ls->decpoint        = '.';
1091    ls->nodes           = eina_hash_pointer_new(EINA_FREE_CB(_node_free));
1092    next_char(ls);
1093 
1094    Eolian_Unit *ncunit = calloc(1, sizeof(Eolian_Unit));
1095    if (!ncunit)
1096      {
1097         eo_lexer_free(ls);
1098         eolian_state_panic(state, "out of memory");
1099      }
1100    ls->unit = ncunit;
1101    database_unit_init(state, ncunit, ls->filename);
1102    eina_hash_add(state->staging.units, ls->filename, ncunit);
1103 
1104    if (ls->current != 0xEF)
1105      return;
1106    next_char(ls);
1107    if (ls->current != 0xBB)
1108      return;
1109    next_char(ls);
1110    if (ls->current != 0xBF)
1111      return;
1112    next_char(ls);
1113 }
1114 
1115 Eolian_Object *
eo_lexer_node_new(Eo_Lexer * ls,size_t objsize)1116 eo_lexer_node_new(Eo_Lexer *ls, size_t objsize)
1117 {
1118    Eolian_Object *obj = calloc(1, objsize);
1119    if (!obj)
1120      longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1121    eina_hash_add(ls->nodes, &obj, obj);
1122    eolian_object_ref(obj);
1123    return obj;
1124 }
1125 
1126 Eolian_Object *
eo_lexer_node_release(Eo_Lexer * ls,Eolian_Object * obj)1127 eo_lexer_node_release(Eo_Lexer *ls, Eolian_Object *obj)
1128 {
1129    /* just for debug */
1130    assert(eina_hash_find(ls->nodes, &obj) && (obj->refcount >= 1));
1131    (void)eolian_object_unref(obj);
1132    eina_hash_set(ls->nodes, &obj, NULL);
1133    return obj;
1134 }
1135 
1136 static void
_free_tok(Eo_Token * tok)1137 _free_tok(Eo_Token *tok)
1138 {
1139    if (tok->token < START_CUSTOM || tok->token == TOK_NUMBER ||
1140                                     tok->token == TOK_CHAR)
1141      return;
1142    if (tok->token == TOK_DOC)
1143      {
1144         /* free doc */
1145         if (!tok->value.doc) return;
1146         eina_stringshare_del(tok->value.doc->summary);
1147         eina_stringshare_del(tok->value.doc->description);
1148         free(tok->value.doc);
1149         tok->value.doc = NULL;
1150         return;
1151      }
1152    eina_stringshare_del(tok->value.s);
1153    tok->value.s = NULL;
1154 }
1155 
1156 void
eo_lexer_dtor_push(Eo_Lexer * ls,Eina_Free_Cb free_cb,void * data)1157 eo_lexer_dtor_push(Eo_Lexer *ls, Eina_Free_Cb free_cb, void *data)
1158 {
1159    Eo_Lexer_Dtor *dt = malloc(sizeof(Eo_Lexer_Dtor));
1160    if (!dt)
1161      {
1162         free_cb(data);
1163         longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1164      }
1165    dt->free_cb = free_cb;
1166    dt->data = data;
1167    ls->dtors = eina_list_prepend(ls->dtors, dt);
1168 }
1169 
1170 void
eo_lexer_dtor_pop(Eo_Lexer * ls)1171 eo_lexer_dtor_pop(Eo_Lexer *ls)
1172 {
1173    Eo_Lexer_Dtor *dt = eina_list_data_get(ls->dtors);
1174    ls->dtors = eina_list_remove_list(ls->dtors, ls->dtors);
1175    dt->free_cb(dt->data);
1176    free(dt);
1177 }
1178 
1179 void
eo_lexer_free(Eo_Lexer * ls)1180 eo_lexer_free(Eo_Lexer *ls)
1181 {
1182    if (!ls) return;
1183    if (ls->source  ) eina_stringshare_del(ls->source);
1184    if (ls->filename) eina_stringshare_del(ls->filename);
1185    if (ls->buff    ) eina_strbuf_free    (ls->buff);
1186    if (ls->handle  ) eina_file_close     (ls->handle);
1187 
1188    _free_tok(&ls->t);
1189    eo_lexer_context_clear(ls);
1190 
1191    Eo_Lexer_Dtor *dtor;
1192    EINA_LIST_FREE(ls->dtors, dtor)
1193      dtor->free_cb(dtor->data);
1194 
1195    eina_hash_free(ls->nodes);
1196 
1197    free(ls);
1198 }
1199 
1200 Eo_Lexer *
eo_lexer_new(Eolian_State * state,const char * source)1201 eo_lexer_new(Eolian_State *state, const char *source)
1202 {
1203    volatile Eo_Lexer *ls = calloc(1, sizeof(Eo_Lexer));
1204    if (!ls)
1205      eolian_state_panic(state, "out of memory");
1206 
1207    if (!setjmp(((Eo_Lexer *)(ls))->err_jmp))
1208      {
1209         eo_lexer_set_input((Eo_Lexer *) ls, state, source);
1210         return (Eo_Lexer *) ls;
1211      }
1212    eo_lexer_free((Eo_Lexer *) ls);
1213    return NULL;
1214 }
1215 
1216 int
eo_lexer_get(Eo_Lexer * ls)1217 eo_lexer_get(Eo_Lexer *ls)
1218 {
1219    _free_tok(&ls->t);
1220    if (ls->lookahead.token >= 0)
1221      {
1222         ls->t               = ls->lookahead;
1223         ls->lookahead.token = -1;
1224         return ls->t.token;
1225      }
1226    ls->t.kw = 0;
1227    return (ls->t.token = lex(ls, &ls->t));
1228 }
1229 
1230 int
eo_lexer_lookahead(Eo_Lexer * ls)1231 eo_lexer_lookahead(Eo_Lexer *ls)
1232 {
1233    assert (ls->lookahead.token < 0);
1234    ls->lookahead.kw = 0;
1235    eo_lexer_context_push(ls);
1236    ls->lookahead.token = lex(ls, &ls->lookahead);
1237    eo_lexer_context_restore(ls);
1238    eo_lexer_context_pop(ls);
1239    return ls->lookahead.token;
1240 }
1241 
1242 void
eo_lexer_lex_error(Eo_Lexer * ls,const char * msg,int token)1243 eo_lexer_lex_error(Eo_Lexer *ls, const char *msg, int token)
1244 {
1245    if (token)
1246      {
1247         char buf[256];
1248         txt_token(ls, token, buf);
1249         throw(ls, "%s near '%s'", msg, buf);
1250      }
1251    else
1252      throw(ls, "%s", msg);
1253 }
1254 
1255 void
eo_lexer_syntax_error(Eo_Lexer * ls,const char * msg)1256 eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg)
1257 {
1258    eo_lexer_lex_error(ls, msg, ls->t.token);
1259 }
1260 
1261 void
eo_lexer_token_to_str(int token,char * buf)1262 eo_lexer_token_to_str(int token, char *buf)
1263 {
1264    if (token < 0)
1265      {
1266         memcpy(buf, "<eof>", 6);
1267      }
1268    else if (token < START_CUSTOM)
1269      {
1270         assert((unsigned char)token == token);
1271         if (iscntrl(token))
1272           sprintf(buf, "char(%d)", token);
1273         else
1274           sprintf(buf, "%c", token);
1275      }
1276    else
1277      {
1278         const char *v;
1279         size_t idx = token - START_CUSTOM;
1280         size_t tsz = sizeof(tokens) / sizeof(tokens[0]);
1281         if (idx >= tsz)
1282           v = keywords[idx - tsz];
1283         else
1284           v = tokens[idx];
1285         memcpy(buf, v, strlen(v) + 1);
1286      }
1287 }
1288 
1289 const char *
eo_lexer_keyword_str_get(int kw)1290 eo_lexer_keyword_str_get(int kw)
1291 {
1292    return keywords[kw - 1];
1293 }
1294 
1295 Eina_Bool
eo_lexer_is_type_keyword(int kw)1296 eo_lexer_is_type_keyword(int kw)
1297 {
1298    return (kw >= KW_byte && kw < KW_true);
1299 }
1300 
1301 int
eo_lexer_keyword_str_to_id(const char * kw)1302 eo_lexer_keyword_str_to_id(const char *kw)
1303 {
1304    return (int)(uintptr_t)eina_hash_find(keyword_map, kw);
1305 }
1306 
1307 const char *
eo_lexer_get_c_type(int kw)1308 eo_lexer_get_c_type(int kw)
1309 {
1310    if (!eo_lexer_is_type_keyword(kw)) return NULL;
1311    return ctypes[kw - KW_byte];
1312 }
1313 
1314 static Eina_Bool
_eo_is_tokstr(int t)1315 _eo_is_tokstr(int t) {
1316     return (t == TOK_STRING) || (t == TOK_VALUE);
1317 }
1318 
1319 void
eo_lexer_context_push(Eo_Lexer * ls)1320 eo_lexer_context_push(Eo_Lexer *ls)
1321 {
1322    Lexer_Ctx *ctx = malloc(sizeof(Lexer_Ctx));
1323    if (!ctx)
1324      longjmp(ls->err_jmp, EO_LEXER_ERROR_OOM);
1325    ctx->line = ls->line_number;
1326    ctx->column = ls->column;
1327    ctx->linestr = ls->stream_line;
1328    ctx->token = ls->t;
1329    if (_eo_is_tokstr(ctx->token.token))
1330      eina_stringshare_ref(ctx->token.value.s);
1331    ls->saved_ctxs = eina_list_prepend(ls->saved_ctxs, ctx);
1332 }
1333 
1334 void
eo_lexer_context_pop(Eo_Lexer * ls)1335 eo_lexer_context_pop(Eo_Lexer *ls)
1336 {
1337    Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
1338    if (_eo_is_tokstr(ctx->token.token))
1339      eina_stringshare_del(ctx->token.value.s);
1340    free(ctx);
1341    ls->saved_ctxs = eina_list_remove_list(ls->saved_ctxs, ls->saved_ctxs);
1342 }
1343 
1344 void
eo_lexer_context_restore(Eo_Lexer * ls)1345 eo_lexer_context_restore(Eo_Lexer *ls)
1346 {
1347    if (!eina_list_count(ls->saved_ctxs)) return;
1348    Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
1349    ls->line_number = ctx->line;
1350    ls->column      = ctx->column;
1351    ls->stream_line = ctx->linestr;
1352    if (_eo_is_tokstr(ls->t.token))
1353      eina_stringshare_del(ls->t.value.s);
1354    ls->t = ctx->token;
1355    if (_eo_is_tokstr(ls->t.token))
1356      eina_stringshare_ref(ls->t.value.s);
1357 }
1358 
1359 void
eo_lexer_context_clear(Eo_Lexer * ls)1360 eo_lexer_context_clear(Eo_Lexer *ls)
1361 {
1362    Lexer_Ctx *ctx;
1363    EINA_LIST_FREE(ls->saved_ctxs, ctx) free(ctx);
1364 }
1365