1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB and Kjell Winblad 2019. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20
21 /*
22 * Author: Kjell Winblad
23 */
24
25
26 #include "lib/tiny_regex_c/re.h"
27 #include "ycf_yield_fun.h"
28 #include "ycf_utils.h"
29
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33
34
35
ycf_symbol_is_text_eq(ycf_symbol * symbol,char * str)36 int ycf_symbol_is_text_eq(ycf_symbol* symbol, char* str){
37 unsigned long symbol_length = symbol->stop - symbol->start;
38 return
39 symbol_length == strlen(str) &&
40 strncmp(str, &symbol->source[symbol->start], symbol_length) == 0;
41 }
42
ycf_symbol_text_between(ycf_symbol * s1,ycf_symbol * s2)43 char* ycf_symbol_text_between(ycf_symbol* s1, ycf_symbol* s2){
44 int size = s2->stop - s1->start;
45 char* str = ycf_malloc(size+1);
46 strncpy(str, &s1->source[s1->start], size);
47 str[size] = 0;
48 return str;
49 }
50
get_symbol_type_text(ycf_symbol_type type)51 char* get_symbol_type_text(ycf_symbol_type type){
52 switch(type) {
53 case ycf_symbol_type_comment: return "ycf_symbol_type_comment";
54 case ycf_symbol_type_string_literal: return "ycf_symbol_type_string_literal";
55 case ycf_symbol_type_macro_define: return "ycf_symbol_type_macro_define";
56 case ycf_symbol_type_macro_command: return "ycf_symbol_type_macro_command";
57 case ycf_symbol_type_whitespace: return "ycf_symbol_type_whitespace";
58 case ycf_symbol_type_identifier: return "ycf_symbol_type_identifier";
59 case ycf_symbol_type_number: return "ycf_symbol_type_number";
60 case ycf_symbol_type_star: return "ycf_symbol_type_star";
61 case ycf_symbol_type_neg: return "ycf_symbol_type_neg";
62 case ycf_symbol_type_equal_equal_sign: return "ycf_symbol_type_equal_equal_sign";
63 case ycf_symbol_type_not_equal_sign: return "ycf_symbol_type_not_equal_sign";
64 case ycf_symbol_type_open_parenthesis: return "ycf_symbol_type_open_parenthesis";
65 case ycf_symbol_type_end_parenthesis: return "ycf_symbol_type_end_parenthesis";
66 case ycf_symbol_type_open_curly_brace: return "ycf_symbol_type_open_curly_brace";
67 case ycf_symbol_type_end_curly_brace: return "ycf_symbol_type_end_curly_brace";
68 case ycf_symbol_type_open_square_bracket: return "ycf_symbol_type_open_square_bracket";
69 case ycf_symbol_type_end_square_bracket: return "ycf_symbol_type_end_square_bracket";
70 case ycf_symbol_type_equal_sign: return "ycf_symbol_type_equal_sign";
71 case ycf_symbol_type_semicolon: return "ycf_symbol_type_semicolon";
72 case ycf_symbol_type_comma: return "ycf_symbol_type_comma";
73 case ycf_symbol_type_pointer_field_access: return "ycf_symbol_type_pointer_field_access";
74 case ycf_symbol_type_period: return "ycf_symbol_type_period";
75 case ycf_symbol_type_const: return "ycf_symbol_type_const";
76 case ycf_symbol_type_void: return "ycf_symbol_type_void";
77 case ycf_symbol_type_volatile: return "ycf_symbol_type_volatile";
78 case ycf_symbol_type_static: return "ycf_symbol_type_static";
79 case ycf_symbol_type_inline: return "ycf_symbol_type_inline";
80 case ycf_symbol_type_return: return "ycf_symbol_type_return";
81 case ycf_symbol_type_if: return "ycf_symbol_type_if";
82 case ycf_symbol_type_else: return "ycf_symbol_type_else";
83 case ycf_symbol_type_goto: return "ycf_symbol_type_goto";
84 case ycf_symbol_type_break: return "ycf_symbol_type_break";
85 case ycf_symbol_type_while: return "ycf_symbol_type_while";
86 case ycf_symbol_type_do: return "ycf_symbol_type_do";
87 case ycf_symbol_type_for: return "ycf_symbol_type_for";
88 case ycf_symbol_type_switch: return "ycf_symbol_type_switch";
89 case ycf_symbol_type_continue: return "ycf_symbol_type_continue";
90 case ycf_symbol_type_something_else: return "ycf_symbol_type_something_else";
91 case ycf_symbol_type_special_code_start: return "ycf_symbol_type_special_code_start";
92 case ycf_symbol_type_special_code_end: return "ycf_symbol_type_special_code_end";
93 }
94 return "non_existing_symbol?";
95 }
96
97 typedef struct symbol_finder {
98 int (*finder)(struct symbol_finder*,char*);
99 ycf_symbol_type type;
100 int length;
101 char *str_1;
102 char *str_2;
103 } symbol_finder;
104
starts_with(char * str,char * prefix)105 int starts_with(char *str, char *prefix)
106 {
107 return strncmp(str, prefix, strlen(prefix)) == 0;
108 }
109
until_no_match(symbol_finder * f,char * text)110 int until_no_match(symbol_finder* f, char* text){
111 int pos = 0;
112 while(re_match(f->str_1, &(text[pos])) == 0){
113 pos++;
114 }
115 return pos;
116 }
117
string_litteral_finder(symbol_finder * f,char * text)118 int string_litteral_finder(symbol_finder* f, char* text){
119 int pos = 0;
120 if (starts_with(text, "\"")){
121 pos++;
122 //\"(\\.|[^"\\])*\"
123 while(re_match("\\.", &(text[pos])) == 0 ||
124 re_match("[^\"]", &(text[pos])) == 0){
125 pos++;
126 }
127 if(starts_with(&(text[pos]), "\"")){
128 return pos + 1;
129 }else {
130 printf("Broken string litteral\n");
131 exit(1);
132 }
133 }
134 return pos;
135 }
136
macro_define_finder(symbol_finder * f,char * text)137 int macro_define_finder(symbol_finder* f, char* text){
138 int pos = 0;
139 if (starts_with(text, "#define")){
140 pos = pos + strlen("#define");
141 while(1){
142 if(starts_with(&(text[pos]), "\\\n")){
143 pos = pos + 2;
144 } else if (starts_with(&(text[pos]), "\n")){
145 break;
146 } else {
147 pos++;
148 }
149 }
150 }
151 return pos;
152 }
153
154
starts_with_until_no_match(symbol_finder * f,char * text)155 int starts_with_until_no_match(symbol_finder* f, char* text){
156 int pos = 0;
157 if(re_match(f->str_1, text) == 0){
158 while(re_match(f->str_2, &(text[pos])) == 0){
159 pos++;
160 }
161 }
162 return pos;
163 }
164
starts_with_ends_with(symbol_finder * f,char * text)165 int starts_with_ends_with(symbol_finder* f, char* text){
166 if(starts_with(text, f->str_1)){
167 int pos = 1;
168 while(!starts_with(&(text[pos]), f->str_2)){
169 pos++;
170 }
171 return pos+strlen(f->str_2);
172 }
173 return 0;
174 }
175
fixed_string(symbol_finder * f,char * text)176 int fixed_string(symbol_finder* f, char* text){
177 if(starts_with(text, f->str_1)){
178 return strlen(f->str_1);
179 }
180 return 0;
181 }
182
fixed_alpha_string(symbol_finder * f,char * text)183 int fixed_alpha_string(symbol_finder* f, char* text){
184 if(starts_with(text, f->str_1) &&
185 re_match("[^\\W]", &text[strlen(f->str_1)])){
186 return strlen(f->str_1);
187 }
188 return 0;
189 }
190
regex_char(symbol_finder * f,char * text)191 int regex_char(symbol_finder* f, char* text){
192 if(re_match(f->str_1, text) == 0){
193 return 1;
194 }
195 return 0;
196 }
197
fold_whitespace_and_comments(ycf_symbol_list * symbols)198 void fold_whitespace_and_comments(ycf_symbol_list* symbols){
199 ycf_symbol* prev = NULL;
200 ycf_symbol* current = symbols->head;
201 ycf_symbol* dummy = ycf_malloc(sizeof(ycf_symbol));
202 while(current != NULL){
203 current->whitespace_or_comment_before = NULL;
204 if(prev != NULL && (prev->type == ycf_symbol_type_whitespace ||
205 prev->type == ycf_symbol_type_comment)){
206 current->whitespace_or_comment_before = prev;
207 }
208 prev = current;
209 current = current->next;
210 }
211 // remove ycf_symbol_type_whitespace and comments from list
212 dummy->type = ycf_symbol_type_void;
213 dummy->next = symbols->head;
214 prev = dummy;
215 current = prev->next;
216 while(current != NULL &&
217 current != symbols->last){
218 if(current->type == ycf_symbol_type_whitespace ||
219 current->type == ycf_symbol_type_comment){
220 prev->next = current->next;
221 current = current->next;
222 }else {
223 prev = current;
224 current = current->next;
225 }
226 }
227 symbols->head = dummy->next;
228 }
229
ycf_symbol_list_from_text(char * text)230 ycf_symbol_list ycf_symbol_list_from_text(char* text){
231 int pos = 0;
232 int nr_of_finders = 41;
233 int i;
234 ycf_symbol_list ret = ycf_symbol_list_empty();
235 symbol_finder symbol_finders[] =
236 {
237 {
238 .type = ycf_symbol_type_special_code_start,
239 .str_1 = "/*special_code_start:",
240 .str_2 = "*/",
241 .finder = starts_with_ends_with
242 },
243 {
244 .type = ycf_symbol_type_special_code_end,
245 .str_1 = "/*special_code_end*/",
246 .finder = fixed_string
247 },
248 {
249 .type = ycf_symbol_type_comment,
250 .str_1 = "/*",
251 .str_2 = "*/",
252 .finder = starts_with_ends_with
253 },
254 {
255 .type = ycf_symbol_type_string_literal,
256 .finder = string_litteral_finder
257 },
258 {
259 .type = ycf_symbol_type_macro_define,
260 .finder = macro_define_finder
261 },
262 {
263 .type = ycf_symbol_type_macro_command,
264 .str_1 = "#",
265 .str_2 = "\n",
266 .finder = starts_with_ends_with
267 },
268 {
269 .type = ycf_symbol_type_whitespace,
270 .str_1 = "\\s",
271 .finder = until_no_match
272 },
273 {
274 .type = ycf_symbol_type_void,
275 .str_1 = "void",
276 .finder = fixed_alpha_string
277 },
278 {
279 .type = ycf_symbol_type_static,
280 .str_1 = "static",
281 .finder = fixed_alpha_string
282 },
283 {
284 .type = ycf_symbol_type_inline,
285 .str_1 = "inline",
286 .finder = fixed_alpha_string
287 },
288 {
289 .type = ycf_symbol_type_const,
290 .str_1 = "const",
291 .finder = fixed_alpha_string
292 },
293 {
294 .type = ycf_symbol_type_volatile,
295 .str_1 = "volatile",
296 .finder = fixed_alpha_string
297 },
298 {
299 .type = ycf_symbol_type_return,
300 .str_1 = "return",
301 .finder = fixed_alpha_string
302 },
303 {
304 .type = ycf_symbol_type_if,
305 .str_1 = "if",
306 .finder = fixed_alpha_string
307 },
308 {
309 .type = ycf_symbol_type_else,
310 .str_1 = "else",
311 .finder = fixed_alpha_string
312 },
313 {
314 .type = ycf_symbol_type_goto,
315 .str_1 = "goto",
316 .finder = fixed_alpha_string
317 },
318 {
319 .type = ycf_symbol_type_break,
320 .str_1 = "break",
321 .finder = fixed_alpha_string
322 },
323 {
324 .type = ycf_symbol_type_continue,
325 .str_1 = "continue",
326 .finder = fixed_alpha_string
327 },
328 {
329 .type = ycf_symbol_type_while,
330 .str_1 = "while",
331 .finder = fixed_alpha_string
332 },
333 {
334 .type = ycf_symbol_type_do,
335 .str_1 = "do",
336 .finder = fixed_alpha_string
337 },
338 {
339 .type = ycf_symbol_type_for,
340 .str_1 = "for",
341 .finder = fixed_alpha_string
342 },
343 {
344 .type = ycf_symbol_type_switch,
345 .str_1 = "switch",
346 .finder = fixed_alpha_string
347 },
348 {
349 .type = ycf_symbol_type_identifier,
350 .str_1 = "[a-zA-Z]",
351 .str_2 = "\\w",
352 .finder = starts_with_until_no_match
353 },
354 {
355 .type = ycf_symbol_type_number,
356 .str_1 = "\\d",
357 .finder = until_no_match
358 },
359 {
360 .type = ycf_symbol_type_open_parenthesis,
361 .str_1 = "(",
362 .finder = fixed_string
363 },
364 {
365 .type = ycf_symbol_type_end_parenthesis,
366 .str_1 = ")",
367 .finder = fixed_string
368 },
369 {
370 .type = ycf_symbol_type_open_curly_brace,
371 .str_1 = "{",
372 .finder = fixed_string
373 },
374 {
375 .type = ycf_symbol_type_end_curly_brace,
376 .str_1 = "}",
377 .finder = fixed_string
378 },
379 {
380 .type = ycf_symbol_type_open_square_bracket,
381 .str_1 = "[",
382 .finder = fixed_string
383 },
384 {
385 .type = ycf_symbol_type_end_square_bracket,
386 .str_1 = "]",
387 .finder = fixed_string
388 },
389 {
390 .type = ycf_symbol_type_equal_sign,
391 .str_1 = "=",
392 .finder = fixed_string
393 },
394 {
395 .type = ycf_symbol_type_not_equal_sign,
396 .str_1 = "!=",
397 .finder = fixed_string
398 },
399 {
400 .type = ycf_symbol_type_equal_sign,
401 .str_1 = "==",
402 .finder = fixed_string
403 },
404 {
405 .type = ycf_symbol_type_star,
406 .str_1 = "*",
407 .finder = fixed_string
408 },
409 {
410 .type = ycf_symbol_type_neg,
411 .str_1 = "!",
412 .finder = fixed_string
413 },
414 {
415 .type = ycf_symbol_type_semicolon,
416 .str_1 = ";",
417 .finder = fixed_string
418 },
419 {
420 .type = ycf_symbol_type_comma,
421 .str_1 = ",",
422 .finder = fixed_string
423 },
424 {
425 .type = ycf_symbol_type_period,
426 .str_1 = ".",
427 .finder = fixed_string
428 },
429 {
430 .type = ycf_symbol_type_pointer_field_access,
431 .str_1 = "->",
432 .finder = fixed_string
433 },
434 {
435 .type = ycf_symbol_type_something_else,
436 .str_1 = ".",
437 .finder = regex_char
438 }
439 };
440 while(text[pos] != 0){
441 int last_pos = pos;
442 for(i = 0; i < nr_of_finders; i++) {
443 symbol_finder f = symbol_finders[i];
444 int stop = f.finder(&f, &text[pos]);
445 if(stop){
446 ycf_symbol* s = ycf_malloc(sizeof(ycf_symbol));
447 s->type = f.type;
448 s->source = text;
449 s->start = pos;
450 s->stop = pos + stop;
451 s->next = NULL;
452 ycf_symbol_list_append(&ret, s);
453 pos = s->stop;
454 break;
455 }
456 }
457 if (last_pos == pos){
458 printf("Lexer: NOTHING MATCH Stuck at: \n%s\n", &text[pos]);
459 exit(1);
460 }
461 }
462 fold_whitespace_and_comments(&ret);
463 return ret;
464 }
465
ycf_symbol_list_print(char * text)466 void ycf_symbol_list_print(char* text){
467 ycf_symbol_list symbols = ycf_symbol_list_from_text(text);
468 ycf_symbol* s = symbols.head;
469 while(s != NULL){
470 printf("TYPE %s, START=%d, STOP=%d\n",
471 get_symbol_type_text(s->type),
472 s->start,
473 s->stop);
474 s = s->next;
475 }
476 printf("||||| END OF SYMBOLS\n");
477 }
478