1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB and Kjell Winblad 2019. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /*
22  * Author: Kjell Winblad
23  */
24 
25 
26 #include "lib/tiny_regex_c/re.h"
27 #include "ycf_yield_fun.h"
28 #include "ycf_utils.h"
29 
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 
34 
35 
ycf_symbol_is_text_eq(ycf_symbol * symbol,char * str)36 int ycf_symbol_is_text_eq(ycf_symbol* symbol, char* str){
37   unsigned long symbol_length = symbol->stop - symbol->start;
38   return
39     symbol_length == strlen(str) &&
40     strncmp(str, &symbol->source[symbol->start], symbol_length) == 0;
41 }
42 
ycf_symbol_text_between(ycf_symbol * s1,ycf_symbol * s2)43 char* ycf_symbol_text_between(ycf_symbol* s1, ycf_symbol* s2){
44   int size = s2->stop - s1->start;
45   char* str = ycf_malloc(size+1);
46   strncpy(str, &s1->source[s1->start], size);
47   str[size] = 0;
48   return str;
49 }
50 
get_symbol_type_text(ycf_symbol_type type)51 char* get_symbol_type_text(ycf_symbol_type type){
52   switch(type) {
53   case ycf_symbol_type_comment: return "ycf_symbol_type_comment";
54   case ycf_symbol_type_string_literal: return "ycf_symbol_type_string_literal";
55   case ycf_symbol_type_macro_define: return "ycf_symbol_type_macro_define";
56   case ycf_symbol_type_macro_command: return "ycf_symbol_type_macro_command";
57   case ycf_symbol_type_whitespace: return "ycf_symbol_type_whitespace";
58   case ycf_symbol_type_identifier: return "ycf_symbol_type_identifier";
59   case ycf_symbol_type_number: return "ycf_symbol_type_number";
60   case ycf_symbol_type_star: return "ycf_symbol_type_star";
61   case ycf_symbol_type_neg: return "ycf_symbol_type_neg";
62   case ycf_symbol_type_equal_equal_sign: return "ycf_symbol_type_equal_equal_sign";
63   case ycf_symbol_type_not_equal_sign: return "ycf_symbol_type_not_equal_sign";
64   case ycf_symbol_type_open_parenthesis: return "ycf_symbol_type_open_parenthesis";
65   case ycf_symbol_type_end_parenthesis: return "ycf_symbol_type_end_parenthesis";
66   case ycf_symbol_type_open_curly_brace: return "ycf_symbol_type_open_curly_brace";
67   case ycf_symbol_type_end_curly_brace: return "ycf_symbol_type_end_curly_brace";
68   case ycf_symbol_type_open_square_bracket: return "ycf_symbol_type_open_square_bracket";
69   case ycf_symbol_type_end_square_bracket: return "ycf_symbol_type_end_square_bracket";
70   case ycf_symbol_type_equal_sign: return "ycf_symbol_type_equal_sign";
71   case ycf_symbol_type_semicolon: return "ycf_symbol_type_semicolon";
72   case ycf_symbol_type_comma: return "ycf_symbol_type_comma";
73   case ycf_symbol_type_pointer_field_access: return "ycf_symbol_type_pointer_field_access";
74   case ycf_symbol_type_period: return "ycf_symbol_type_period";
75   case ycf_symbol_type_const: return "ycf_symbol_type_const";
76   case ycf_symbol_type_void: return "ycf_symbol_type_void";
77   case ycf_symbol_type_volatile: return "ycf_symbol_type_volatile";
78   case ycf_symbol_type_static: return "ycf_symbol_type_static";
79   case ycf_symbol_type_inline: return "ycf_symbol_type_inline";
80   case ycf_symbol_type_return: return "ycf_symbol_type_return";
81   case ycf_symbol_type_if: return "ycf_symbol_type_if";
82   case ycf_symbol_type_else: return "ycf_symbol_type_else";
83   case ycf_symbol_type_goto: return "ycf_symbol_type_goto";
84   case ycf_symbol_type_break: return "ycf_symbol_type_break";
85   case ycf_symbol_type_while: return "ycf_symbol_type_while";
86   case ycf_symbol_type_do: return "ycf_symbol_type_do";
87   case ycf_symbol_type_for: return "ycf_symbol_type_for";
88   case ycf_symbol_type_switch: return "ycf_symbol_type_switch";
89   case ycf_symbol_type_continue: return "ycf_symbol_type_continue";
90   case ycf_symbol_type_something_else: return "ycf_symbol_type_something_else";
91   case ycf_symbol_type_special_code_start: return "ycf_symbol_type_special_code_start";
92   case ycf_symbol_type_special_code_end: return "ycf_symbol_type_special_code_end";
93   }
94   return "non_existing_symbol?";
95 }
96 
97 typedef struct symbol_finder {
98   int (*finder)(struct symbol_finder*,char*);
99   ycf_symbol_type type;
100   int length;
101   char *str_1;
102   char *str_2;
103 } symbol_finder;
104 
starts_with(char * str,char * prefix)105 int starts_with(char *str, char *prefix)
106 {
107   return strncmp(str, prefix, strlen(prefix)) == 0;
108 }
109 
until_no_match(symbol_finder * f,char * text)110 int until_no_match(symbol_finder* f, char* text){
111   int pos = 0;
112   while(re_match(f->str_1, &(text[pos])) == 0){
113     pos++;
114   }
115   return pos;
116 }
117 
string_litteral_finder(symbol_finder * f,char * text)118 int string_litteral_finder(symbol_finder* f, char* text){
119   int pos = 0;
120   if (starts_with(text, "\"")){
121     pos++;
122     //\"(\\.|[^"\\])*\"
123     while(re_match("\\.", &(text[pos])) == 0 ||
124           re_match("[^\"]", &(text[pos])) == 0){
125       pos++;
126     }
127     if(starts_with(&(text[pos]), "\"")){
128       return pos + 1;
129     }else {
130       printf("Broken string litteral\n");
131       exit(1);
132     }
133   }
134   return pos;
135 }
136 
macro_define_finder(symbol_finder * f,char * text)137 int macro_define_finder(symbol_finder* f, char* text){
138   int pos = 0;
139   if (starts_with(text, "#define")){
140     pos = pos + strlen("#define");
141     while(1){
142       if(starts_with(&(text[pos]), "\\\n")){
143         pos = pos + 2;
144       } else if (starts_with(&(text[pos]), "\n")){
145         break;
146       } else {
147         pos++;
148       }
149     }
150   }
151   return pos;
152 }
153 
154 
starts_with_until_no_match(symbol_finder * f,char * text)155 int starts_with_until_no_match(symbol_finder* f, char* text){
156   int pos = 0;
157   if(re_match(f->str_1, text) == 0){
158     while(re_match(f->str_2, &(text[pos])) == 0){
159       pos++;
160     }
161   }
162   return pos;
163 }
164 
starts_with_ends_with(symbol_finder * f,char * text)165 int starts_with_ends_with(symbol_finder* f, char* text){
166   if(starts_with(text, f->str_1)){
167     int pos = 1;
168     while(!starts_with(&(text[pos]), f->str_2)){
169       pos++;
170     }
171     return pos+strlen(f->str_2);
172   }
173   return 0;
174 }
175 
fixed_string(symbol_finder * f,char * text)176 int fixed_string(symbol_finder* f, char* text){
177   if(starts_with(text, f->str_1)){
178     return strlen(f->str_1);
179   }
180   return 0;
181 }
182 
fixed_alpha_string(symbol_finder * f,char * text)183 int fixed_alpha_string(symbol_finder* f, char* text){
184   if(starts_with(text, f->str_1) &&
185      re_match("[^\\W]", &text[strlen(f->str_1)])){
186     return strlen(f->str_1);
187   }
188   return 0;
189 }
190 
regex_char(symbol_finder * f,char * text)191 int regex_char(symbol_finder* f, char* text){
192   if(re_match(f->str_1, text) == 0){
193     return 1;
194   }
195   return 0;
196 }
197 
fold_whitespace_and_comments(ycf_symbol_list * symbols)198 void fold_whitespace_and_comments(ycf_symbol_list* symbols){
199   ycf_symbol* prev = NULL;
200   ycf_symbol* current = symbols->head;
201   ycf_symbol* dummy = ycf_malloc(sizeof(ycf_symbol));
202   while(current != NULL){
203     current->whitespace_or_comment_before = NULL;
204     if(prev != NULL && (prev->type == ycf_symbol_type_whitespace ||
205                         prev->type == ycf_symbol_type_comment)){
206       current->whitespace_or_comment_before = prev;
207     }
208     prev = current;
209     current = current->next;
210   }
211   // remove ycf_symbol_type_whitespace and comments from list
212   dummy->type = ycf_symbol_type_void;
213   dummy->next = symbols->head;
214   prev = dummy;
215   current = prev->next;
216   while(current != NULL &&
217         current != symbols->last){
218     if(current->type == ycf_symbol_type_whitespace ||
219        current->type == ycf_symbol_type_comment){
220       prev->next = current->next;
221       current = current->next;
222     }else {
223       prev = current;
224       current = current->next;
225     }
226   }
227   symbols->head = dummy->next;
228 }
229 
ycf_symbol_list_from_text(char * text)230 ycf_symbol_list ycf_symbol_list_from_text(char* text){
231   int pos = 0;
232   int nr_of_finders = 41;
233   int i;
234   ycf_symbol_list ret = ycf_symbol_list_empty();
235   symbol_finder symbol_finders[] =
236     {
237       {
238         .type = ycf_symbol_type_special_code_start,
239         .str_1 = "/*special_code_start:",
240         .str_2 = "*/",
241         .finder = starts_with_ends_with
242       },
243       {
244         .type = ycf_symbol_type_special_code_end,
245         .str_1 = "/*special_code_end*/",
246         .finder = fixed_string
247       },
248       {
249         .type = ycf_symbol_type_comment,
250         .str_1 = "/*",
251         .str_2 = "*/",
252         .finder = starts_with_ends_with
253       },
254       {
255         .type = ycf_symbol_type_string_literal,
256         .finder = string_litteral_finder
257       },
258       {
259         .type = ycf_symbol_type_macro_define,
260         .finder = macro_define_finder
261       },
262       {
263         .type = ycf_symbol_type_macro_command,
264         .str_1 = "#",
265         .str_2 = "\n",
266         .finder = starts_with_ends_with
267       },
268       {
269         .type = ycf_symbol_type_whitespace,
270         .str_1 = "\\s",
271         .finder = until_no_match
272       },
273       {
274         .type = ycf_symbol_type_void,
275         .str_1 = "void",
276         .finder = fixed_alpha_string
277       },
278       {
279         .type = ycf_symbol_type_static,
280         .str_1 = "static",
281         .finder = fixed_alpha_string
282       },
283       {
284         .type = ycf_symbol_type_inline,
285         .str_1 = "inline",
286         .finder = fixed_alpha_string
287       },
288       {
289         .type = ycf_symbol_type_const,
290         .str_1 = "const",
291         .finder = fixed_alpha_string
292       },
293       {
294         .type = ycf_symbol_type_volatile,
295         .str_1 = "volatile",
296         .finder = fixed_alpha_string
297       },
298       {
299         .type = ycf_symbol_type_return,
300         .str_1 = "return",
301         .finder = fixed_alpha_string
302       },
303       {
304         .type = ycf_symbol_type_if,
305         .str_1 = "if",
306         .finder = fixed_alpha_string
307       },
308       {
309         .type = ycf_symbol_type_else,
310         .str_1 = "else",
311         .finder = fixed_alpha_string
312       },
313       {
314         .type = ycf_symbol_type_goto,
315         .str_1 = "goto",
316         .finder = fixed_alpha_string
317       },
318       {
319         .type = ycf_symbol_type_break,
320         .str_1 = "break",
321         .finder = fixed_alpha_string
322       },
323       {
324         .type = ycf_symbol_type_continue,
325         .str_1 = "continue",
326         .finder = fixed_alpha_string
327       },
328       {
329         .type = ycf_symbol_type_while,
330         .str_1 = "while",
331         .finder = fixed_alpha_string
332       },
333       {
334         .type = ycf_symbol_type_do,
335         .str_1 = "do",
336         .finder = fixed_alpha_string
337       },
338       {
339         .type = ycf_symbol_type_for,
340         .str_1 = "for",
341         .finder = fixed_alpha_string
342       },
343       {
344         .type = ycf_symbol_type_switch,
345         .str_1 = "switch",
346         .finder = fixed_alpha_string
347       },
348       {
349         .type = ycf_symbol_type_identifier,
350         .str_1 = "[a-zA-Z]",
351         .str_2 = "\\w",
352         .finder = starts_with_until_no_match
353       },
354       {
355         .type = ycf_symbol_type_number,
356         .str_1 = "\\d",
357         .finder = until_no_match
358       },
359       {
360         .type = ycf_symbol_type_open_parenthesis,
361         .str_1 = "(",
362         .finder = fixed_string
363       },
364       {
365         .type = ycf_symbol_type_end_parenthesis,
366         .str_1 = ")",
367         .finder = fixed_string
368       },
369       {
370         .type = ycf_symbol_type_open_curly_brace,
371         .str_1 = "{",
372         .finder = fixed_string
373       },
374       {
375         .type = ycf_symbol_type_end_curly_brace,
376         .str_1 = "}",
377         .finder = fixed_string
378       },
379       {
380         .type = ycf_symbol_type_open_square_bracket,
381         .str_1 = "[",
382         .finder = fixed_string
383       },
384       {
385         .type = ycf_symbol_type_end_square_bracket,
386         .str_1 = "]",
387         .finder = fixed_string
388       },
389       {
390         .type = ycf_symbol_type_equal_sign,
391         .str_1 = "=",
392         .finder = fixed_string
393       },
394       {
395         .type = ycf_symbol_type_not_equal_sign,
396         .str_1 = "!=",
397         .finder = fixed_string
398       },
399       {
400         .type = ycf_symbol_type_equal_sign,
401         .str_1 = "==",
402         .finder = fixed_string
403       },
404       {
405         .type = ycf_symbol_type_star,
406         .str_1 = "*",
407         .finder = fixed_string
408       },
409       {
410         .type = ycf_symbol_type_neg,
411         .str_1 = "!",
412         .finder = fixed_string
413       },
414       {
415         .type = ycf_symbol_type_semicolon,
416         .str_1 = ";",
417         .finder = fixed_string
418       },
419       {
420         .type = ycf_symbol_type_comma,
421         .str_1 = ",",
422         .finder = fixed_string
423       },
424       {
425         .type = ycf_symbol_type_period,
426         .str_1 = ".",
427         .finder = fixed_string
428       },
429       {
430         .type = ycf_symbol_type_pointer_field_access,
431         .str_1 = "->",
432         .finder = fixed_string
433       },
434       {
435         .type = ycf_symbol_type_something_else,
436         .str_1 = ".",
437         .finder = regex_char
438       }
439     };
440   while(text[pos] != 0){
441     int last_pos = pos;
442     for(i = 0; i < nr_of_finders; i++) {
443       symbol_finder f = symbol_finders[i];
444       int stop = f.finder(&f, &text[pos]);
445       if(stop){
446         ycf_symbol* s = ycf_malloc(sizeof(ycf_symbol));
447         s->type = f.type;
448         s->source = text;
449         s->start = pos;
450         s->stop = pos + stop;
451         s->next = NULL;
452         ycf_symbol_list_append(&ret, s);
453         pos = s->stop;
454         break;
455       }
456     }
457     if (last_pos == pos){
458       printf("Lexer: NOTHING MATCH Stuck at: \n%s\n", &text[pos]);
459       exit(1);
460     }
461   }
462   fold_whitespace_and_comments(&ret);
463   return ret;
464 }
465 
ycf_symbol_list_print(char * text)466 void ycf_symbol_list_print(char* text){
467   ycf_symbol_list symbols = ycf_symbol_list_from_text(text);
468   ycf_symbol* s = symbols.head;
469   while(s != NULL){
470     printf("TYPE %s, START=%d, STOP=%d\n",
471            get_symbol_type_text(s->type),
472            s->start,
473            s->stop);
474     s = s->next;
475   }
476   printf("||||| END OF SYMBOLS\n");
477 }
478